Custom resource definition
Co-authored-by: Mathieu Lonjaret <mathieu.lonjaret@gmail.com>
This commit is contained in:
parent
cfaf47c8a2
commit
4c060a78cc
1348 changed files with 92364 additions and 55766 deletions
162
vendor/golang.org/x/text/cases/cases.go
generated
vendored
162
vendor/golang.org/x/text/cases/cases.go
generated
vendored
|
@ -1,162 +0,0 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go gen_trieval.go
|
||||
|
||||
// Package cases provides general and language-specific case mappers.
|
||||
package cases // import "golang.org/x/text/cases"
|
||||
|
||||
import (
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// References:
|
||||
// - Unicode Reference Manual Chapter 3.13, 4.2, and 5.18.
|
||||
// - http://www.unicode.org/reports/tr29/
|
||||
// - http://www.unicode.org/Public/6.3.0/ucd/CaseFolding.txt
|
||||
// - http://www.unicode.org/Public/6.3.0/ucd/SpecialCasing.txt
|
||||
// - http://www.unicode.org/Public/6.3.0/ucd/DerivedCoreProperties.txt
|
||||
// - http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt
|
||||
// - http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakTest.txt
|
||||
// - http://userguide.icu-project.org/transforms/casemappings
|
||||
|
||||
// TODO:
|
||||
// - Case folding
|
||||
// - Wide and Narrow?
|
||||
// - Segmenter option for title casing.
|
||||
// - ASCII fast paths
|
||||
// - Encode Soft-Dotted property within trie somehow.
|
||||
|
||||
// A Caser transforms given input to a certain case. It implements
|
||||
// transform.Transformer.
|
||||
//
|
||||
// A Caser may be stateful and should therefore not be shared between
|
||||
// goroutines.
|
||||
type Caser struct {
|
||||
t transform.SpanningTransformer
|
||||
}
|
||||
|
||||
// Bytes returns a new byte slice with the result of converting b to the case
|
||||
// form implemented by c.
|
||||
func (c Caser) Bytes(b []byte) []byte {
|
||||
b, _, _ = transform.Bytes(c.t, b)
|
||||
return b
|
||||
}
|
||||
|
||||
// String returns a string with the result of transforming s to the case form
|
||||
// implemented by c.
|
||||
func (c Caser) String(s string) string {
|
||||
s, _, _ = transform.String(c.t, s)
|
||||
return s
|
||||
}
|
||||
|
||||
// Reset resets the Caser to be reused for new input after a previous call to
|
||||
// Transform.
|
||||
func (c Caser) Reset() { c.t.Reset() }
|
||||
|
||||
// Transform implements the transform.Transformer interface and transforms the
|
||||
// given input to the case form implemented by c.
|
||||
func (c Caser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
return c.t.Transform(dst, src, atEOF)
|
||||
}
|
||||
|
||||
// Span implements the transform.SpanningTransformer interface.
|
||||
func (c Caser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
return c.t.Span(src, atEOF)
|
||||
}
|
||||
|
||||
// Upper returns a Caser for language-specific uppercasing.
|
||||
func Upper(t language.Tag, opts ...Option) Caser {
|
||||
return Caser{makeUpper(t, getOpts(opts...))}
|
||||
}
|
||||
|
||||
// Lower returns a Caser for language-specific lowercasing.
|
||||
func Lower(t language.Tag, opts ...Option) Caser {
|
||||
return Caser{makeLower(t, getOpts(opts...))}
|
||||
}
|
||||
|
||||
// Title returns a Caser for language-specific title casing. It uses an
|
||||
// approximation of the default Unicode Word Break algorithm.
|
||||
func Title(t language.Tag, opts ...Option) Caser {
|
||||
return Caser{makeTitle(t, getOpts(opts...))}
|
||||
}
|
||||
|
||||
// Fold returns a Caser that implements Unicode case folding. The returned Caser
|
||||
// is stateless and safe to use concurrently by multiple goroutines.
|
||||
//
|
||||
// Case folding does not normalize the input and may not preserve a normal form.
|
||||
// Use the collate or search package for more convenient and linguistically
|
||||
// sound comparisons. Use golang.org/x/text/secure/precis for string comparisons
|
||||
// where security aspects are a concern.
|
||||
func Fold(opts ...Option) Caser {
|
||||
return Caser{makeFold(getOpts(opts...))}
|
||||
}
|
||||
|
||||
// An Option is used to modify the behavior of a Caser.
|
||||
type Option func(o options) options
|
||||
|
||||
// TODO: consider these options to take a boolean as well, like FinalSigma.
|
||||
// The advantage of using this approach is that other providers of a lower-case
|
||||
// algorithm could set different defaults by prefixing a user-provided slice
|
||||
// of options with their own. This is handy, for instance, for the precis
|
||||
// package which would override the default to not handle the Greek final sigma.
|
||||
|
||||
var (
|
||||
// NoLower disables the lowercasing of non-leading letters for a title
|
||||
// caser.
|
||||
NoLower Option = noLower
|
||||
|
||||
// Compact omits mappings in case folding for characters that would grow the
|
||||
// input. (Unimplemented.)
|
||||
Compact Option = compact
|
||||
)
|
||||
|
||||
// TODO: option to preserve a normal form, if applicable?
|
||||
|
||||
type options struct {
|
||||
noLower bool
|
||||
simple bool
|
||||
|
||||
// TODO: segmenter, max ignorable, alternative versions, etc.
|
||||
|
||||
ignoreFinalSigma bool
|
||||
}
|
||||
|
||||
func getOpts(o ...Option) (res options) {
|
||||
for _, f := range o {
|
||||
res = f(res)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func noLower(o options) options {
|
||||
o.noLower = true
|
||||
return o
|
||||
}
|
||||
|
||||
func compact(o options) options {
|
||||
o.simple = true
|
||||
return o
|
||||
}
|
||||
|
||||
// HandleFinalSigma specifies whether the special handling of Greek final sigma
|
||||
// should be enabled. Unicode prescribes handling the Greek final sigma for all
|
||||
// locales, but standards like IDNA and PRECIS override this default.
|
||||
func HandleFinalSigma(enable bool) Option {
|
||||
if enable {
|
||||
return handleFinalSigma
|
||||
}
|
||||
return ignoreFinalSigma
|
||||
}
|
||||
|
||||
func ignoreFinalSigma(o options) options {
|
||||
o.ignoreFinalSigma = true
|
||||
return o
|
||||
}
|
||||
|
||||
func handleFinalSigma(o options) options {
|
||||
o.ignoreFinalSigma = false
|
||||
return o
|
||||
}
|
376
vendor/golang.org/x/text/cases/context.go
generated
vendored
376
vendor/golang.org/x/text/cases/context.go
generated
vendored
|
@ -1,376 +0,0 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cases
|
||||
|
||||
import "golang.org/x/text/transform"
|
||||
|
||||
// A context is used for iterating over source bytes, fetching case info and
|
||||
// writing to a destination buffer.
|
||||
//
|
||||
// Casing operations may need more than one rune of context to decide how a rune
|
||||
// should be cased. Casing implementations should call checkpoint on context
|
||||
// whenever it is known to be safe to return the runes processed so far.
|
||||
//
|
||||
// It is recommended for implementations to not allow for more than 30 case
|
||||
// ignorables as lookahead (analogous to the limit in norm) and to use state if
|
||||
// unbounded lookahead is needed for cased runes.
|
||||
type context struct {
|
||||
dst, src []byte
|
||||
atEOF bool
|
||||
|
||||
pDst int // pDst points past the last written rune in dst.
|
||||
pSrc int // pSrc points to the start of the currently scanned rune.
|
||||
|
||||
// checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc.
|
||||
nDst, nSrc int
|
||||
err error
|
||||
|
||||
sz int // size of current rune
|
||||
info info // case information of currently scanned rune
|
||||
|
||||
// State preserved across calls to Transform.
|
||||
isMidWord bool // false if next cased letter needs to be title-cased.
|
||||
}
|
||||
|
||||
func (c *context) Reset() {
|
||||
c.isMidWord = false
|
||||
}
|
||||
|
||||
// ret returns the return values for the Transform method. It checks whether
|
||||
// there were insufficient bytes in src to complete and introduces an error
|
||||
// accordingly, if necessary.
|
||||
func (c *context) ret() (nDst, nSrc int, err error) {
|
||||
if c.err != nil || c.nSrc == len(c.src) {
|
||||
return c.nDst, c.nSrc, c.err
|
||||
}
|
||||
// This point is only reached by mappers if there was no short destination
|
||||
// buffer. This means that the source buffer was exhausted and that c.sz was
|
||||
// set to 0 by next.
|
||||
if c.atEOF && c.pSrc == len(c.src) {
|
||||
return c.pDst, c.pSrc, nil
|
||||
}
|
||||
return c.nDst, c.nSrc, transform.ErrShortSrc
|
||||
}
|
||||
|
||||
// retSpan returns the return values for the Span method. It checks whether
|
||||
// there were insufficient bytes in src to complete and introduces an error
|
||||
// accordingly, if necessary.
|
||||
func (c *context) retSpan() (n int, err error) {
|
||||
_, nSrc, err := c.ret()
|
||||
return nSrc, err
|
||||
}
|
||||
|
||||
// checkpoint sets the return value buffer points for Transform to the current
|
||||
// positions.
|
||||
func (c *context) checkpoint() {
|
||||
if c.err == nil {
|
||||
c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz
|
||||
}
|
||||
}
|
||||
|
||||
// unreadRune causes the last rune read by next to be reread on the next
|
||||
// invocation of next. Only one unreadRune may be called after a call to next.
|
||||
func (c *context) unreadRune() {
|
||||
c.sz = 0
|
||||
}
|
||||
|
||||
func (c *context) next() bool {
|
||||
c.pSrc += c.sz
|
||||
if c.pSrc == len(c.src) || c.err != nil {
|
||||
c.info, c.sz = 0, 0
|
||||
return false
|
||||
}
|
||||
v, sz := trie.lookup(c.src[c.pSrc:])
|
||||
c.info, c.sz = info(v), sz
|
||||
if c.sz == 0 {
|
||||
if c.atEOF {
|
||||
// A zero size means we have an incomplete rune. If we are atEOF,
|
||||
// this means it is an illegal rune, which we will consume one
|
||||
// byte at a time.
|
||||
c.sz = 1
|
||||
} else {
|
||||
c.err = transform.ErrShortSrc
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// writeBytes adds bytes to dst.
|
||||
func (c *context) writeBytes(b []byte) bool {
|
||||
if len(c.dst)-c.pDst < len(b) {
|
||||
c.err = transform.ErrShortDst
|
||||
return false
|
||||
}
|
||||
// This loop is faster than using copy.
|
||||
for _, ch := range b {
|
||||
c.dst[c.pDst] = ch
|
||||
c.pDst++
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// writeString writes the given string to dst.
|
||||
func (c *context) writeString(s string) bool {
|
||||
if len(c.dst)-c.pDst < len(s) {
|
||||
c.err = transform.ErrShortDst
|
||||
return false
|
||||
}
|
||||
// This loop is faster than using copy.
|
||||
for i := 0; i < len(s); i++ {
|
||||
c.dst[c.pDst] = s[i]
|
||||
c.pDst++
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// copy writes the current rune to dst.
|
||||
func (c *context) copy() bool {
|
||||
return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz])
|
||||
}
|
||||
|
||||
// copyXOR copies the current rune to dst and modifies it by applying the XOR
|
||||
// pattern of the case info. It is the responsibility of the caller to ensure
|
||||
// that this is a rune with a XOR pattern defined.
|
||||
func (c *context) copyXOR() bool {
|
||||
if !c.copy() {
|
||||
return false
|
||||
}
|
||||
if c.info&xorIndexBit == 0 {
|
||||
// Fast path for 6-bit XOR pattern, which covers most cases.
|
||||
c.dst[c.pDst-1] ^= byte(c.info >> xorShift)
|
||||
} else {
|
||||
// Interpret XOR bits as an index.
|
||||
// TODO: test performance for unrolling this loop. Verify that we have
|
||||
// at least two bytes and at most three.
|
||||
idx := c.info >> xorShift
|
||||
for p := c.pDst - 1; ; p-- {
|
||||
c.dst[p] ^= xorData[idx]
|
||||
idx--
|
||||
if xorData[idx] == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// hasPrefix returns true if src[pSrc:] starts with the given string.
|
||||
func (c *context) hasPrefix(s string) bool {
|
||||
b := c.src[c.pSrc:]
|
||||
if len(b) < len(s) {
|
||||
return false
|
||||
}
|
||||
for i, c := range b[:len(s)] {
|
||||
if c != s[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// caseType returns an info with only the case bits, normalized to either
|
||||
// cLower, cUpper, cTitle or cUncased.
|
||||
func (c *context) caseType() info {
|
||||
cm := c.info & 0x7
|
||||
if cm < 4 {
|
||||
return cm
|
||||
}
|
||||
if cm >= cXORCase {
|
||||
// xor the last bit of the rune with the case type bits.
|
||||
b := c.src[c.pSrc+c.sz-1]
|
||||
return info(b&1) ^ cm&0x3
|
||||
}
|
||||
if cm == cIgnorableCased {
|
||||
return cLower
|
||||
}
|
||||
return cUncased
|
||||
}
|
||||
|
||||
// lower writes the lowercase version of the current rune to dst.
|
||||
func lower(c *context) bool {
|
||||
ct := c.caseType()
|
||||
if c.info&hasMappingMask == 0 || ct == cLower {
|
||||
return c.copy()
|
||||
}
|
||||
if c.info&exceptionBit == 0 {
|
||||
return c.copyXOR()
|
||||
}
|
||||
e := exceptions[c.info>>exceptionShift:]
|
||||
offset := 2 + e[0]&lengthMask // size of header + fold string
|
||||
if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
|
||||
return c.writeString(e[offset : offset+nLower])
|
||||
}
|
||||
return c.copy()
|
||||
}
|
||||
|
||||
func isLower(c *context) bool {
|
||||
ct := c.caseType()
|
||||
if c.info&hasMappingMask == 0 || ct == cLower {
|
||||
return true
|
||||
}
|
||||
if c.info&exceptionBit == 0 {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
e := exceptions[c.info>>exceptionShift:]
|
||||
if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// upper writes the uppercase version of the current rune to dst.
|
||||
func upper(c *context) bool {
|
||||
ct := c.caseType()
|
||||
if c.info&hasMappingMask == 0 || ct == cUpper {
|
||||
return c.copy()
|
||||
}
|
||||
if c.info&exceptionBit == 0 {
|
||||
return c.copyXOR()
|
||||
}
|
||||
e := exceptions[c.info>>exceptionShift:]
|
||||
offset := 2 + e[0]&lengthMask // size of header + fold string
|
||||
// Get length of first special case mapping.
|
||||
n := (e[1] >> lengthBits) & lengthMask
|
||||
if ct == cTitle {
|
||||
// The first special case mapping is for lower. Set n to the second.
|
||||
if n == noChange {
|
||||
n = 0
|
||||
}
|
||||
n, e = e[1]&lengthMask, e[n:]
|
||||
}
|
||||
if n != noChange {
|
||||
return c.writeString(e[offset : offset+n])
|
||||
}
|
||||
return c.copy()
|
||||
}
|
||||
|
||||
// isUpper writes the isUppercase version of the current rune to dst.
|
||||
func isUpper(c *context) bool {
|
||||
ct := c.caseType()
|
||||
if c.info&hasMappingMask == 0 || ct == cUpper {
|
||||
return true
|
||||
}
|
||||
if c.info&exceptionBit == 0 {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
e := exceptions[c.info>>exceptionShift:]
|
||||
// Get length of first special case mapping.
|
||||
n := (e[1] >> lengthBits) & lengthMask
|
||||
if ct == cTitle {
|
||||
n = e[1] & lengthMask
|
||||
}
|
||||
if n != noChange {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// title writes the title case version of the current rune to dst.
|
||||
func title(c *context) bool {
|
||||
ct := c.caseType()
|
||||
if c.info&hasMappingMask == 0 || ct == cTitle {
|
||||
return c.copy()
|
||||
}
|
||||
if c.info&exceptionBit == 0 {
|
||||
if ct == cLower {
|
||||
return c.copyXOR()
|
||||
}
|
||||
return c.copy()
|
||||
}
|
||||
// Get the exception data.
|
||||
e := exceptions[c.info>>exceptionShift:]
|
||||
offset := 2 + e[0]&lengthMask // size of header + fold string
|
||||
|
||||
nFirst := (e[1] >> lengthBits) & lengthMask
|
||||
if nTitle := e[1] & lengthMask; nTitle != noChange {
|
||||
if nFirst != noChange {
|
||||
e = e[nFirst:]
|
||||
}
|
||||
return c.writeString(e[offset : offset+nTitle])
|
||||
}
|
||||
if ct == cLower && nFirst != noChange {
|
||||
// Use the uppercase version instead.
|
||||
return c.writeString(e[offset : offset+nFirst])
|
||||
}
|
||||
// Already in correct case.
|
||||
return c.copy()
|
||||
}
|
||||
|
||||
// isTitle reports whether the current rune is in title case.
|
||||
func isTitle(c *context) bool {
|
||||
ct := c.caseType()
|
||||
if c.info&hasMappingMask == 0 || ct == cTitle {
|
||||
return true
|
||||
}
|
||||
if c.info&exceptionBit == 0 {
|
||||
if ct == cLower {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
// Get the exception data.
|
||||
e := exceptions[c.info>>exceptionShift:]
|
||||
if nTitle := e[1] & lengthMask; nTitle != noChange {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
nFirst := (e[1] >> lengthBits) & lengthMask
|
||||
if ct == cLower && nFirst != noChange {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// foldFull writes the foldFull version of the current rune to dst.
|
||||
func foldFull(c *context) bool {
|
||||
if c.info&hasMappingMask == 0 {
|
||||
return c.copy()
|
||||
}
|
||||
ct := c.caseType()
|
||||
if c.info&exceptionBit == 0 {
|
||||
if ct != cLower || c.info&inverseFoldBit != 0 {
|
||||
return c.copyXOR()
|
||||
}
|
||||
return c.copy()
|
||||
}
|
||||
e := exceptions[c.info>>exceptionShift:]
|
||||
n := e[0] & lengthMask
|
||||
if n == 0 {
|
||||
if ct == cLower {
|
||||
return c.copy()
|
||||
}
|
||||
n = (e[1] >> lengthBits) & lengthMask
|
||||
}
|
||||
return c.writeString(e[2 : 2+n])
|
||||
}
|
||||
|
||||
// isFoldFull reports whether the current run is mapped to foldFull
|
||||
func isFoldFull(c *context) bool {
|
||||
if c.info&hasMappingMask == 0 {
|
||||
return true
|
||||
}
|
||||
ct := c.caseType()
|
||||
if c.info&exceptionBit == 0 {
|
||||
if ct != cLower || c.info&inverseFoldBit != 0 {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
e := exceptions[c.info>>exceptionShift:]
|
||||
n := e[0] & lengthMask
|
||||
if n == 0 && ct == cLower {
|
||||
return true
|
||||
}
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
34
vendor/golang.org/x/text/cases/fold.go
generated
vendored
34
vendor/golang.org/x/text/cases/fold.go
generated
vendored
|
@ -1,34 +0,0 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cases
|
||||
|
||||
import "golang.org/x/text/transform"
|
||||
|
||||
type caseFolder struct{ transform.NopResetter }
|
||||
|
||||
// caseFolder implements the Transformer interface for doing case folding.
|
||||
func (t *caseFolder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||
for c.next() {
|
||||
foldFull(&c)
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.ret()
|
||||
}
|
||||
|
||||
func (t *caseFolder) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
c := context{src: src, atEOF: atEOF}
|
||||
for c.next() && isFoldFull(&c) {
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.retSpan()
|
||||
}
|
||||
|
||||
func makeFold(o options) transform.SpanningTransformer {
|
||||
// TODO: Special case folding, through option Language, Special/Turkic, or
|
||||
// both.
|
||||
// TODO: Implement Compact options.
|
||||
return &caseFolder{}
|
||||
}
|
839
vendor/golang.org/x/text/cases/gen.go
generated
vendored
839
vendor/golang.org/x/text/cases/gen.go
generated
vendored
|
@ -1,839 +0,0 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
// This program generates the trie for casing operations. The Unicode casing
|
||||
// algorithm requires the lookup of various properties and mappings for each
|
||||
// rune. The table generated by this generator combines several of the most
|
||||
// frequently used of these into a single trie so that they can be accessed
|
||||
// with a single lookup.
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/triegen"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
genTables()
|
||||
genTablesTest()
|
||||
gen.Repackage("gen_trieval.go", "trieval.go", "cases")
|
||||
}
|
||||
|
||||
// runeInfo contains all information for a rune that we care about for casing
|
||||
// operations.
|
||||
type runeInfo struct {
|
||||
Rune rune
|
||||
|
||||
entry info // trie value for this rune.
|
||||
|
||||
CaseMode info
|
||||
|
||||
// Simple case mappings.
|
||||
Simple [1 + maxCaseMode][]rune
|
||||
|
||||
// Special casing
|
||||
HasSpecial bool
|
||||
Conditional bool
|
||||
Special [1 + maxCaseMode][]rune
|
||||
|
||||
// Folding
|
||||
FoldSimple rune
|
||||
FoldSpecial rune
|
||||
FoldFull []rune
|
||||
|
||||
// TODO: FC_NFKC, or equivalent data.
|
||||
|
||||
// Properties
|
||||
SoftDotted bool
|
||||
CaseIgnorable bool
|
||||
Cased bool
|
||||
DecomposeGreek bool
|
||||
BreakType string
|
||||
BreakCat breakCategory
|
||||
|
||||
// We care mostly about 0, Above, and IotaSubscript.
|
||||
CCC byte
|
||||
}
|
||||
|
||||
type breakCategory int
|
||||
|
||||
const (
|
||||
breakBreak breakCategory = iota
|
||||
breakLetter
|
||||
breakMid
|
||||
)
|
||||
|
||||
// mapping returns the case mapping for the given case type.
|
||||
func (r *runeInfo) mapping(c info) string {
|
||||
if r.HasSpecial {
|
||||
return string(r.Special[c])
|
||||
}
|
||||
if len(r.Simple[c]) != 0 {
|
||||
return string(r.Simple[c])
|
||||
}
|
||||
return string(r.Rune)
|
||||
}
|
||||
|
||||
func parse(file string, f func(p *ucd.Parser)) {
|
||||
ucd.Parse(gen.OpenUCDFile(file), f)
|
||||
}
|
||||
|
||||
func parseUCD() []runeInfo {
|
||||
chars := make([]runeInfo, unicode.MaxRune)
|
||||
|
||||
get := func(r rune) *runeInfo {
|
||||
c := &chars[r]
|
||||
c.Rune = r
|
||||
return c
|
||||
}
|
||||
|
||||
parse("UnicodeData.txt", func(p *ucd.Parser) {
|
||||
ri := get(p.Rune(0))
|
||||
ri.CCC = byte(p.Int(ucd.CanonicalCombiningClass))
|
||||
ri.Simple[cLower] = p.Runes(ucd.SimpleLowercaseMapping)
|
||||
ri.Simple[cUpper] = p.Runes(ucd.SimpleUppercaseMapping)
|
||||
ri.Simple[cTitle] = p.Runes(ucd.SimpleTitlecaseMapping)
|
||||
if p.String(ucd.GeneralCategory) == "Lt" {
|
||||
ri.CaseMode = cTitle
|
||||
}
|
||||
})
|
||||
|
||||
// <code>; <property>
|
||||
parse("PropList.txt", func(p *ucd.Parser) {
|
||||
if p.String(1) == "Soft_Dotted" {
|
||||
chars[p.Rune(0)].SoftDotted = true
|
||||
}
|
||||
})
|
||||
|
||||
// <code>; <word break type>
|
||||
parse("DerivedCoreProperties.txt", func(p *ucd.Parser) {
|
||||
ri := get(p.Rune(0))
|
||||
switch p.String(1) {
|
||||
case "Case_Ignorable":
|
||||
ri.CaseIgnorable = true
|
||||
case "Cased":
|
||||
ri.Cased = true
|
||||
case "Lowercase":
|
||||
ri.CaseMode = cLower
|
||||
case "Uppercase":
|
||||
ri.CaseMode = cUpper
|
||||
}
|
||||
})
|
||||
|
||||
// <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)?
|
||||
parse("SpecialCasing.txt", func(p *ucd.Parser) {
|
||||
// We drop all conditional special casing and deal with them manually in
|
||||
// the language-specific case mappers. Rune 0x03A3 is the only one with
|
||||
// a conditional formatting that is not language-specific. However,
|
||||
// dealing with this letter is tricky, especially in a streaming
|
||||
// context, so we deal with it in the Caser for Greek specifically.
|
||||
ri := get(p.Rune(0))
|
||||
if p.String(4) == "" {
|
||||
ri.HasSpecial = true
|
||||
ri.Special[cLower] = p.Runes(1)
|
||||
ri.Special[cTitle] = p.Runes(2)
|
||||
ri.Special[cUpper] = p.Runes(3)
|
||||
} else {
|
||||
ri.Conditional = true
|
||||
}
|
||||
})
|
||||
|
||||
// TODO: Use text breaking according to UAX #29.
|
||||
// <code>; <word break type>
|
||||
parse("auxiliary/WordBreakProperty.txt", func(p *ucd.Parser) {
|
||||
ri := get(p.Rune(0))
|
||||
ri.BreakType = p.String(1)
|
||||
|
||||
// We collapse the word breaking properties onto the categories we need.
|
||||
switch p.String(1) { // TODO: officially we need to canonicalize.
|
||||
case "MidLetter", "MidNumLet", "Single_Quote":
|
||||
ri.BreakCat = breakMid
|
||||
if !ri.CaseIgnorable {
|
||||
// finalSigma relies on the fact that all breakMid runes are
|
||||
// also a Case_Ignorable. Revisit this code when this changes.
|
||||
log.Fatalf("Rune %U, which has a break category mid, is not a case ignorable", ri)
|
||||
}
|
||||
case "ALetter", "Hebrew_Letter", "Numeric", "Extend", "ExtendNumLet", "Format", "ZWJ":
|
||||
ri.BreakCat = breakLetter
|
||||
}
|
||||
})
|
||||
|
||||
// <code>; <type>; <mapping>
|
||||
parse("CaseFolding.txt", func(p *ucd.Parser) {
|
||||
ri := get(p.Rune(0))
|
||||
switch p.String(1) {
|
||||
case "C":
|
||||
ri.FoldSimple = p.Rune(2)
|
||||
ri.FoldFull = p.Runes(2)
|
||||
case "S":
|
||||
ri.FoldSimple = p.Rune(2)
|
||||
case "T":
|
||||
ri.FoldSpecial = p.Rune(2)
|
||||
case "F":
|
||||
ri.FoldFull = p.Runes(2)
|
||||
default:
|
||||
log.Fatalf("%U: unknown type: %s", p.Rune(0), p.String(1))
|
||||
}
|
||||
})
|
||||
|
||||
return chars
|
||||
}
|
||||
|
||||
func genTables() {
|
||||
chars := parseUCD()
|
||||
verifyProperties(chars)
|
||||
|
||||
t := triegen.NewTrie("case")
|
||||
for i := range chars {
|
||||
c := &chars[i]
|
||||
makeEntry(c)
|
||||
t.Insert(rune(i), uint64(c.entry))
|
||||
}
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile("tables.go", "cases")
|
||||
|
||||
gen.WriteUnicodeVersion(w)
|
||||
|
||||
// TODO: write CLDR version after adding a mechanism to detect that the
|
||||
// tables on which the manually created locale-sensitive casing code is
|
||||
// based hasn't changed.
|
||||
|
||||
w.WriteVar("xorData", string(xorData))
|
||||
w.WriteVar("exceptions", string(exceptionData))
|
||||
|
||||
sz, err := t.Gen(w, triegen.Compact(&sparseCompacter{}))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
w.Size += sz
|
||||
}
|
||||
|
||||
func makeEntry(ri *runeInfo) {
|
||||
if ri.CaseIgnorable {
|
||||
if ri.Cased {
|
||||
ri.entry = cIgnorableCased
|
||||
} else {
|
||||
ri.entry = cIgnorableUncased
|
||||
}
|
||||
} else {
|
||||
ri.entry = ri.CaseMode
|
||||
}
|
||||
|
||||
// TODO: handle soft-dotted.
|
||||
|
||||
ccc := cccOther
|
||||
switch ri.CCC {
|
||||
case 0: // Not_Reordered
|
||||
ccc = cccZero
|
||||
case above: // Above
|
||||
ccc = cccAbove
|
||||
}
|
||||
switch ri.BreakCat {
|
||||
case breakBreak:
|
||||
ccc = cccBreak
|
||||
case breakMid:
|
||||
ri.entry |= isMidBit
|
||||
}
|
||||
|
||||
ri.entry |= ccc
|
||||
|
||||
if ri.CaseMode == cUncased {
|
||||
return
|
||||
}
|
||||
|
||||
// Need to do something special.
|
||||
if ri.CaseMode == cTitle || ri.HasSpecial || ri.mapping(cTitle) != ri.mapping(cUpper) {
|
||||
makeException(ri)
|
||||
return
|
||||
}
|
||||
if f := string(ri.FoldFull); len(f) > 0 && f != ri.mapping(cUpper) && f != ri.mapping(cLower) {
|
||||
makeException(ri)
|
||||
return
|
||||
}
|
||||
|
||||
// Rune is either lowercase or uppercase.
|
||||
|
||||
orig := string(ri.Rune)
|
||||
mapped := ""
|
||||
if ri.CaseMode == cUpper {
|
||||
mapped = ri.mapping(cLower)
|
||||
} else {
|
||||
mapped = ri.mapping(cUpper)
|
||||
}
|
||||
|
||||
if len(orig) != len(mapped) {
|
||||
makeException(ri)
|
||||
return
|
||||
}
|
||||
|
||||
if string(ri.FoldFull) == ri.mapping(cUpper) {
|
||||
ri.entry |= inverseFoldBit
|
||||
}
|
||||
|
||||
n := len(orig)
|
||||
|
||||
// Create per-byte XOR mask.
|
||||
var b []byte
|
||||
for i := 0; i < n; i++ {
|
||||
b = append(b, orig[i]^mapped[i])
|
||||
}
|
||||
|
||||
// Remove leading 0 bytes, but keep at least one byte.
|
||||
for ; len(b) > 1 && b[0] == 0; b = b[1:] {
|
||||
}
|
||||
|
||||
if len(b) == 1 && b[0]&0xc0 == 0 {
|
||||
ri.entry |= info(b[0]) << xorShift
|
||||
return
|
||||
}
|
||||
|
||||
key := string(b)
|
||||
x, ok := xorCache[key]
|
||||
if !ok {
|
||||
xorData = append(xorData, 0) // for detecting start of sequence
|
||||
xorData = append(xorData, b...)
|
||||
|
||||
x = len(xorData) - 1
|
||||
xorCache[key] = x
|
||||
}
|
||||
ri.entry |= info(x<<xorShift) | xorIndexBit
|
||||
}
|
||||
|
||||
var xorCache = map[string]int{}
|
||||
|
||||
// xorData contains byte-wise XOR data for the least significant bytes of a
|
||||
// UTF-8 encoded rune. An index points to the last byte. The sequence starts
|
||||
// with a zero terminator.
|
||||
var xorData = []byte{}
|
||||
|
||||
// See the comments in gen_trieval.go re "the exceptions slice".
|
||||
var exceptionData = []byte{0}
|
||||
|
||||
// makeException encodes case mappings that cannot be expressed in a simple
|
||||
// XOR diff.
|
||||
func makeException(ri *runeInfo) {
|
||||
ccc := ri.entry & cccMask
|
||||
// Set exception bit and retain case type.
|
||||
ri.entry &= 0x0007
|
||||
ri.entry |= exceptionBit
|
||||
|
||||
if len(exceptionData) >= 1<<numExceptionBits {
|
||||
log.Fatalf("%U:exceptionData too large %x > %d bits", ri.Rune, len(exceptionData), numExceptionBits)
|
||||
}
|
||||
|
||||
// Set the offset in the exceptionData array.
|
||||
ri.entry |= info(len(exceptionData) << exceptionShift)
|
||||
|
||||
orig := string(ri.Rune)
|
||||
tc := ri.mapping(cTitle)
|
||||
uc := ri.mapping(cUpper)
|
||||
lc := ri.mapping(cLower)
|
||||
ff := string(ri.FoldFull)
|
||||
|
||||
// addString sets the length of a string and adds it to the expansions array.
|
||||
addString := func(s string, b *byte) {
|
||||
if len(s) == 0 {
|
||||
// Zero-length mappings exist, but only for conditional casing,
|
||||
// which we are representing outside of this table.
|
||||
log.Fatalf("%U: has zero-length mapping.", ri.Rune)
|
||||
}
|
||||
*b <<= 3
|
||||
if s != orig {
|
||||
n := len(s)
|
||||
if n > 7 {
|
||||
log.Fatalf("%U: mapping larger than 7 (%d)", ri.Rune, n)
|
||||
}
|
||||
*b |= byte(n)
|
||||
exceptionData = append(exceptionData, s...)
|
||||
}
|
||||
}
|
||||
|
||||
// byte 0:
|
||||
exceptionData = append(exceptionData, byte(ccc)|byte(len(ff)))
|
||||
|
||||
// byte 1:
|
||||
p := len(exceptionData)
|
||||
exceptionData = append(exceptionData, 0)
|
||||
|
||||
if len(ff) > 7 { // May be zero-length.
|
||||
log.Fatalf("%U: fold string larger than 7 (%d)", ri.Rune, len(ff))
|
||||
}
|
||||
exceptionData = append(exceptionData, ff...)
|
||||
ct := ri.CaseMode
|
||||
if ct != cLower {
|
||||
addString(lc, &exceptionData[p])
|
||||
}
|
||||
if ct != cUpper {
|
||||
addString(uc, &exceptionData[p])
|
||||
}
|
||||
if ct != cTitle {
|
||||
// If title is the same as upper, we set it to the original string so
|
||||
// that it will be marked as not present. This implies title case is
|
||||
// the same as upper case.
|
||||
if tc == uc {
|
||||
tc = orig
|
||||
}
|
||||
addString(tc, &exceptionData[p])
|
||||
}
|
||||
}
|
||||
|
||||
// sparseCompacter is a trie value block Compacter. There are many cases where
|
||||
// successive runes alternate between lower- and upper-case. This Compacter
|
||||
// exploits this by adding a special case type where the case value is obtained
|
||||
// from or-ing it with the least-significant bit of the rune, creating large
|
||||
// ranges of equal case values that compress well.
|
||||
type sparseCompacter struct {
|
||||
sparseBlocks [][]uint16
|
||||
sparseOffsets []uint16
|
||||
sparseCount int
|
||||
}
|
||||
|
||||
// makeSparse returns the number of elements that compact block would contain
|
||||
// as well as the modified values.
|
||||
func makeSparse(vals []uint64) ([]uint16, int) {
|
||||
// Copy the values.
|
||||
values := make([]uint16, len(vals))
|
||||
for i, v := range vals {
|
||||
values[i] = uint16(v)
|
||||
}
|
||||
|
||||
alt := func(i int, v uint16) uint16 {
|
||||
if cm := info(v & fullCasedMask); cm == cUpper || cm == cLower {
|
||||
// Convert cLower or cUpper to cXORCase value, which has the form 11x.
|
||||
xor := v
|
||||
xor &^= 1
|
||||
xor |= uint16(i&1) ^ (v & 1)
|
||||
xor |= 0x4
|
||||
return xor
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
var count int
|
||||
var previous uint16
|
||||
for i, v := range values {
|
||||
if v != 0 {
|
||||
// Try if the unmodified value is equal to the previous.
|
||||
if v == previous {
|
||||
continue
|
||||
}
|
||||
|
||||
// Try if the xor-ed value is equal to the previous value.
|
||||
a := alt(i, v)
|
||||
if a == previous {
|
||||
values[i] = a
|
||||
continue
|
||||
}
|
||||
|
||||
// This is a new value.
|
||||
count++
|
||||
|
||||
// Use the xor-ed value if it will be identical to the next value.
|
||||
if p := i + 1; p < len(values) && alt(p, values[p]) == a {
|
||||
values[i] = a
|
||||
v = a
|
||||
}
|
||||
}
|
||||
previous = v
|
||||
}
|
||||
return values, count
|
||||
}
|
||||
|
||||
func (s *sparseCompacter) Size(v []uint64) (int, bool) {
|
||||
_, n := makeSparse(v)
|
||||
|
||||
// We limit using this method to having 16 entries.
|
||||
if n > 16 {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
return 2 + int(reflect.TypeOf(valueRange{}).Size())*n, true
|
||||
}
|
||||
|
||||
func (s *sparseCompacter) Store(v []uint64) uint32 {
|
||||
h := uint32(len(s.sparseOffsets))
|
||||
values, sz := makeSparse(v)
|
||||
s.sparseBlocks = append(s.sparseBlocks, values)
|
||||
s.sparseOffsets = append(s.sparseOffsets, uint16(s.sparseCount))
|
||||
s.sparseCount += sz
|
||||
return h
|
||||
}
|
||||
|
||||
func (s *sparseCompacter) Handler() string {
|
||||
// The sparse global variable and its lookup method is defined in gen_trieval.go.
|
||||
return "sparse.lookup"
|
||||
}
|
||||
|
||||
func (s *sparseCompacter) Print(w io.Writer) (retErr error) {
|
||||
p := func(format string, args ...interface{}) {
|
||||
_, err := fmt.Fprintf(w, format, args...)
|
||||
if retErr == nil && err != nil {
|
||||
retErr = err
|
||||
}
|
||||
}
|
||||
|
||||
ls := len(s.sparseBlocks)
|
||||
if ls == len(s.sparseOffsets) {
|
||||
s.sparseOffsets = append(s.sparseOffsets, uint16(s.sparseCount))
|
||||
}
|
||||
p("// sparseOffsets: %d entries, %d bytes\n", ls+1, (ls+1)*2)
|
||||
p("var sparseOffsets = %#v\n\n", s.sparseOffsets)
|
||||
|
||||
ns := s.sparseCount
|
||||
p("// sparseValues: %d entries, %d bytes\n", ns, ns*4)
|
||||
p("var sparseValues = [%d]valueRange {", ns)
|
||||
for i, values := range s.sparseBlocks {
|
||||
p("\n// Block %#x, offset %#x", i, s.sparseOffsets[i])
|
||||
var v uint16
|
||||
for i, nv := range values {
|
||||
if nv != v {
|
||||
if v != 0 {
|
||||
p(",hi:%#02x},", 0x80+i-1)
|
||||
}
|
||||
if nv != 0 {
|
||||
p("\n{value:%#04x,lo:%#02x", nv, 0x80+i)
|
||||
}
|
||||
}
|
||||
v = nv
|
||||
}
|
||||
if v != 0 {
|
||||
p(",hi:%#02x},", 0x80+len(values)-1)
|
||||
}
|
||||
}
|
||||
p("\n}\n\n")
|
||||
return
|
||||
}
|
||||
|
||||
// verifyProperties that properties of the runes that are relied upon in the
|
||||
// implementation. Each property is marked with an identifier that is referred
|
||||
// to in the places where it is used.
|
||||
func verifyProperties(chars []runeInfo) {
|
||||
for i, c := range chars {
|
||||
r := rune(i)
|
||||
|
||||
// Rune properties.
|
||||
|
||||
// A.1: modifier never changes on lowercase. [ltLower]
|
||||
if c.CCC > 0 && unicode.ToLower(r) != r {
|
||||
log.Fatalf("%U: non-starter changes when lowercased", r)
|
||||
}
|
||||
|
||||
// A.2: properties of decompositions starting with I or J. [ltLower]
|
||||
d := norm.NFD.PropertiesString(string(r)).Decomposition()
|
||||
if len(d) > 0 {
|
||||
if d[0] == 'I' || d[0] == 'J' {
|
||||
// A.2.1: we expect at least an ASCII character and a modifier.
|
||||
if len(d) < 3 {
|
||||
log.Fatalf("%U: length of decomposition was %d; want >= 3", r, len(d))
|
||||
}
|
||||
|
||||
// All subsequent runes are modifiers and all have the same CCC.
|
||||
runes := []rune(string(d[1:]))
|
||||
ccc := chars[runes[0]].CCC
|
||||
|
||||
for _, mr := range runes[1:] {
|
||||
mc := chars[mr]
|
||||
|
||||
// A.2.2: all modifiers have a CCC of Above or less.
|
||||
if ccc == 0 || ccc > above {
|
||||
log.Fatalf("%U: CCC of successive rune (%U) was %d; want (0,230]", r, mr, ccc)
|
||||
}
|
||||
|
||||
// A.2.3: a sequence of modifiers all have the same CCC.
|
||||
if mc.CCC != ccc {
|
||||
log.Fatalf("%U: CCC of follow-up modifier (%U) was %d; want %d", r, mr, mc.CCC, ccc)
|
||||
}
|
||||
|
||||
// A.2.4: for each trailing r, r in [0x300, 0x311] <=> CCC == Above.
|
||||
if (ccc == above) != (0x300 <= mr && mr <= 0x311) {
|
||||
log.Fatalf("%U: modifier %U in [U+0300, U+0311] != ccc(%U) == 230", r, mr, mr)
|
||||
}
|
||||
|
||||
if i += len(string(mr)); i >= len(d) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A.3: no U+0307 in decomposition of Soft-Dotted rune. [ltUpper]
|
||||
if unicode.Is(unicode.Soft_Dotted, r) && strings.Contains(string(d), "\u0307") {
|
||||
log.Fatalf("%U: decomposition of soft-dotted rune may not contain U+0307", r)
|
||||
}
|
||||
|
||||
// A.4: only rune U+0345 may be of CCC Iota_Subscript. [elUpper]
|
||||
if c.CCC == iotaSubscript && r != 0x0345 {
|
||||
log.Fatalf("%U: only rune U+0345 may have CCC Iota_Subscript", r)
|
||||
}
|
||||
|
||||
// A.5: soft-dotted runes do not have exceptions.
|
||||
if c.SoftDotted && c.entry&exceptionBit != 0 {
|
||||
log.Fatalf("%U: soft-dotted has exception", r)
|
||||
}
|
||||
|
||||
// A.6: Greek decomposition. [elUpper]
|
||||
if unicode.Is(unicode.Greek, r) {
|
||||
if b := norm.NFD.PropertiesString(string(r)).Decomposition(); b != nil {
|
||||
runes := []rune(string(b))
|
||||
// A.6.1: If a Greek rune decomposes and the first rune of the
|
||||
// decomposition is greater than U+00FF, the rune is always
|
||||
// great and not a modifier.
|
||||
if f := runes[0]; unicode.IsMark(f) || f > 0xFF && !unicode.Is(unicode.Greek, f) {
|
||||
log.Fatalf("%U: expeced first rune of Greek decomposition to be letter, found %U", r, f)
|
||||
}
|
||||
// A.6.2: Any follow-up rune in a Greek decomposition is a
|
||||
// modifier of which the first should be gobbled in
|
||||
// decomposition.
|
||||
for _, m := range runes[1:] {
|
||||
switch m {
|
||||
case 0x0313, 0x0314, 0x0301, 0x0300, 0x0306, 0x0342, 0x0308, 0x0304, 0x345:
|
||||
default:
|
||||
log.Fatalf("%U: modifier %U is outside of expeced Greek modifier set", r, m)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Breaking properties.
|
||||
|
||||
// B.1: all runes with CCC > 0 are of break type Extend.
|
||||
if c.CCC > 0 && c.BreakType != "Extend" {
|
||||
log.Fatalf("%U: CCC == %d, but got break type %s; want Extend", r, c.CCC, c.BreakType)
|
||||
}
|
||||
|
||||
// B.2: all cased runes with c.CCC == 0 are of break type ALetter.
|
||||
if c.CCC == 0 && c.Cased && c.BreakType != "ALetter" {
|
||||
log.Fatalf("%U: cased, but got break type %s; want ALetter", r, c.BreakType)
|
||||
}
|
||||
|
||||
// B.3: letter category.
|
||||
if c.CCC == 0 && c.BreakCat != breakBreak && !c.CaseIgnorable {
|
||||
if c.BreakCat != breakLetter {
|
||||
log.Fatalf("%U: check for letter break type gave %d; want %d", r, c.BreakCat, breakLetter)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func genTablesTest() {
|
||||
w := &bytes.Buffer{}
|
||||
|
||||
fmt.Fprintln(w, "var (")
|
||||
printProperties(w, "DerivedCoreProperties.txt", "Case_Ignorable", verifyIgnore)
|
||||
|
||||
// We discard the output as we know we have perfect functions. We run them
|
||||
// just to verify the properties are correct.
|
||||
n := printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Cased", verifyCased)
|
||||
n += printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Lowercase", verifyLower)
|
||||
n += printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Uppercase", verifyUpper)
|
||||
if n > 0 {
|
||||
log.Fatalf("One of the discarded properties does not have a perfect filter.")
|
||||
}
|
||||
|
||||
// <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)?
|
||||
fmt.Fprintln(w, "\tspecial = map[rune]struct{ toLower, toTitle, toUpper string }{")
|
||||
parse("SpecialCasing.txt", func(p *ucd.Parser) {
|
||||
// Skip conditional entries.
|
||||
if p.String(4) != "" {
|
||||
return
|
||||
}
|
||||
r := p.Rune(0)
|
||||
fmt.Fprintf(w, "\t\t0x%04x: {%q, %q, %q},\n",
|
||||
r, string(p.Runes(1)), string(p.Runes(2)), string(p.Runes(3)))
|
||||
})
|
||||
fmt.Fprint(w, "\t}\n\n")
|
||||
|
||||
// <code>; <type>; <runes>
|
||||
table := map[rune]struct{ simple, full, special string }{}
|
||||
parse("CaseFolding.txt", func(p *ucd.Parser) {
|
||||
r := p.Rune(0)
|
||||
t := p.String(1)
|
||||
v := string(p.Runes(2))
|
||||
if t != "T" && v == string(unicode.ToLower(r)) {
|
||||
return
|
||||
}
|
||||
x := table[r]
|
||||
switch t {
|
||||
case "C":
|
||||
x.full = v
|
||||
x.simple = v
|
||||
case "S":
|
||||
x.simple = v
|
||||
case "F":
|
||||
x.full = v
|
||||
case "T":
|
||||
x.special = v
|
||||
}
|
||||
table[r] = x
|
||||
})
|
||||
fmt.Fprintln(w, "\tfoldMap = map[rune]struct{ simple, full, special string }{")
|
||||
for r := rune(0); r < 0x10FFFF; r++ {
|
||||
x, ok := table[r]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
fmt.Fprintf(w, "\t\t0x%04x: {%q, %q, %q},\n", r, x.simple, x.full, x.special)
|
||||
}
|
||||
fmt.Fprint(w, "\t}\n\n")
|
||||
|
||||
// Break property
|
||||
notBreak := map[rune]bool{}
|
||||
parse("auxiliary/WordBreakProperty.txt", func(p *ucd.Parser) {
|
||||
switch p.String(1) {
|
||||
case "Extend", "Format", "MidLetter", "MidNumLet", "Single_Quote",
|
||||
"ALetter", "Hebrew_Letter", "Numeric", "ExtendNumLet", "ZWJ":
|
||||
notBreak[p.Rune(0)] = true
|
||||
}
|
||||
})
|
||||
|
||||
fmt.Fprintln(w, "\tbreakProp = []struct{ lo, hi rune }{")
|
||||
inBreak := false
|
||||
for r := rune(0); r <= lastRuneForTesting; r++ {
|
||||
if isBreak := !notBreak[r]; isBreak != inBreak {
|
||||
if isBreak {
|
||||
fmt.Fprintf(w, "\t\t{0x%x, ", r)
|
||||
} else {
|
||||
fmt.Fprintf(w, "0x%x},\n", r-1)
|
||||
}
|
||||
inBreak = isBreak
|
||||
}
|
||||
}
|
||||
if inBreak {
|
||||
fmt.Fprintf(w, "0x%x},\n", lastRuneForTesting)
|
||||
}
|
||||
fmt.Fprint(w, "\t}\n\n")
|
||||
|
||||
// Word break test
|
||||
// Filter out all samples that do not contain cased characters.
|
||||
cased := map[rune]bool{}
|
||||
parse("DerivedCoreProperties.txt", func(p *ucd.Parser) {
|
||||
if p.String(1) == "Cased" {
|
||||
cased[p.Rune(0)] = true
|
||||
}
|
||||
})
|
||||
|
||||
fmt.Fprintln(w, "\tbreakTest = []string{")
|
||||
parse("auxiliary/WordBreakTest.txt", func(p *ucd.Parser) {
|
||||
c := strings.Split(p.String(0), " ")
|
||||
|
||||
const sep = '|'
|
||||
numCased := 0
|
||||
test := ""
|
||||
for ; len(c) >= 2; c = c[2:] {
|
||||
if c[0] == "÷" && test != "" {
|
||||
test += string(sep)
|
||||
}
|
||||
i, err := strconv.ParseUint(c[1], 16, 32)
|
||||
r := rune(i)
|
||||
if err != nil {
|
||||
log.Fatalf("Invalid rune %q.", c[1])
|
||||
}
|
||||
if r == sep {
|
||||
log.Fatalf("Separator %q not allowed in test data. Pick another one.", sep)
|
||||
}
|
||||
if cased[r] {
|
||||
numCased++
|
||||
}
|
||||
test += string(r)
|
||||
}
|
||||
if numCased > 1 {
|
||||
fmt.Fprintf(w, "\t\t%q,\n", test)
|
||||
}
|
||||
})
|
||||
fmt.Fprintln(w, "\t}")
|
||||
|
||||
fmt.Fprintln(w, ")")
|
||||
|
||||
gen.WriteGoFile("tables_test.go", "cases", w.Bytes())
|
||||
}
|
||||
|
||||
// These functions are just used for verification that their definition have not
|
||||
// changed in the Unicode Standard.
|
||||
|
||||
func verifyCased(r rune) bool {
|
||||
return verifyLower(r) || verifyUpper(r) || unicode.IsTitle(r)
|
||||
}
|
||||
|
||||
func verifyLower(r rune) bool {
|
||||
return unicode.IsLower(r) || unicode.Is(unicode.Other_Lowercase, r)
|
||||
}
|
||||
|
||||
func verifyUpper(r rune) bool {
|
||||
return unicode.IsUpper(r) || unicode.Is(unicode.Other_Uppercase, r)
|
||||
}
|
||||
|
||||
// verifyIgnore is an approximation of the Case_Ignorable property using the
|
||||
// core unicode package. It is used to reduce the size of the test data.
|
||||
func verifyIgnore(r rune) bool {
|
||||
props := []*unicode.RangeTable{
|
||||
unicode.Mn,
|
||||
unicode.Me,
|
||||
unicode.Cf,
|
||||
unicode.Lm,
|
||||
unicode.Sk,
|
||||
}
|
||||
for _, p := range props {
|
||||
if unicode.Is(p, r) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// printProperties prints tables of rune properties from the given UCD file.
|
||||
// A filter func f can be given to exclude certain values. A rune r will have
|
||||
// the indicated property if it is in the generated table or if f(r).
|
||||
func printProperties(w io.Writer, file, property string, f func(r rune) bool) int {
|
||||
verify := map[rune]bool{}
|
||||
n := 0
|
||||
varNameParts := strings.Split(property, "_")
|
||||
varNameParts[0] = strings.ToLower(varNameParts[0])
|
||||
fmt.Fprintf(w, "\t%s = map[rune]bool{\n", strings.Join(varNameParts, ""))
|
||||
parse(file, func(p *ucd.Parser) {
|
||||
if p.String(1) == property {
|
||||
r := p.Rune(0)
|
||||
verify[r] = true
|
||||
if !f(r) {
|
||||
n++
|
||||
fmt.Fprintf(w, "\t\t0x%.4x: true,\n", r)
|
||||
}
|
||||
}
|
||||
})
|
||||
fmt.Fprint(w, "\t}\n\n")
|
||||
|
||||
// Verify that f is correct, that is, it represents a subset of the property.
|
||||
for r := rune(0); r <= lastRuneForTesting; r++ {
|
||||
if !verify[r] && f(r) {
|
||||
log.Fatalf("Incorrect filter func for property %q.", property)
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// The newCaseTrie, sparseValues and sparseOffsets definitions below are
|
||||
// placeholders referred to by gen_trieval.go. The real definitions are
|
||||
// generated by this program and written to tables.go.
|
||||
|
||||
func newCaseTrie(int) int { return 0 }
|
||||
|
||||
var (
|
||||
sparseValues [0]valueRange
|
||||
sparseOffsets [0]uint16
|
||||
)
|
219
vendor/golang.org/x/text/cases/gen_trieval.go
generated
vendored
219
vendor/golang.org/x/text/cases/gen_trieval.go
generated
vendored
|
@ -1,219 +0,0 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This file contains definitions for interpreting the trie value of the case
|
||||
// trie generated by "go run gen*.go". It is shared by both the generator
|
||||
// program and the resultant package. Sharing is achieved by the generator
|
||||
// copying gen_trieval.go to trieval.go and changing what's above this comment.
|
||||
|
||||
// info holds case information for a single rune. It is the value returned
|
||||
// by a trie lookup. Most mapping information can be stored in a single 16-bit
|
||||
// value. If not, for example when a rune is mapped to multiple runes, the value
|
||||
// stores some basic case data and an index into an array with additional data.
|
||||
//
|
||||
// The per-rune values have the following format:
|
||||
//
|
||||
// if (exception) {
|
||||
// 15..5 unsigned exception index
|
||||
// 4 unused
|
||||
// } else {
|
||||
// 15..8 XOR pattern or index to XOR pattern for case mapping
|
||||
// Only 13..8 are used for XOR patterns.
|
||||
// 7 inverseFold (fold to upper, not to lower)
|
||||
// 6 index: interpret the XOR pattern as an index
|
||||
// or isMid if case mode is cIgnorableUncased.
|
||||
// 5..4 CCC: zero (normal or break), above or other
|
||||
// }
|
||||
// 3 exception: interpret this value as an exception index
|
||||
// (TODO: is this bit necessary? Probably implied from case mode.)
|
||||
// 2..0 case mode
|
||||
//
|
||||
// For the non-exceptional cases, a rune must be either uncased, lowercase or
|
||||
// uppercase. If the rune is cased, the XOR pattern maps either a lowercase
|
||||
// rune to uppercase or an uppercase rune to lowercase (applied to the 10
|
||||
// least-significant bits of the rune).
|
||||
//
|
||||
// See the definitions below for a more detailed description of the various
|
||||
// bits.
|
||||
type info uint16
|
||||
|
||||
const (
|
||||
casedMask = 0x0003
|
||||
fullCasedMask = 0x0007
|
||||
ignorableMask = 0x0006
|
||||
ignorableValue = 0x0004
|
||||
|
||||
inverseFoldBit = 1 << 7
|
||||
isMidBit = 1 << 6
|
||||
|
||||
exceptionBit = 1 << 3
|
||||
exceptionShift = 5
|
||||
numExceptionBits = 11
|
||||
|
||||
xorIndexBit = 1 << 6
|
||||
xorShift = 8
|
||||
|
||||
// There is no mapping if all xor bits and the exception bit are zero.
|
||||
hasMappingMask = 0xff80 | exceptionBit
|
||||
)
|
||||
|
||||
// The case mode bits encodes the case type of a rune. This includes uncased,
|
||||
// title, upper and lower case and case ignorable. (For a definition of these
|
||||
// terms see Chapter 3 of The Unicode Standard Core Specification.) In some rare
|
||||
// cases, a rune can be both cased and case-ignorable. This is encoded by
|
||||
// cIgnorableCased. A rune of this type is always lower case. Some runes are
|
||||
// cased while not having a mapping.
|
||||
//
|
||||
// A common pattern for scripts in the Unicode standard is for upper and lower
|
||||
// case runes to alternate for increasing rune values (e.g. the accented Latin
|
||||
// ranges starting from U+0100 and U+1E00 among others and some Cyrillic
|
||||
// characters). We use this property by defining a cXORCase mode, where the case
|
||||
// mode (always upper or lower case) is derived from the rune value. As the XOR
|
||||
// pattern for case mappings is often identical for successive runes, using
|
||||
// cXORCase can result in large series of identical trie values. This, in turn,
|
||||
// allows us to better compress the trie blocks.
|
||||
const (
|
||||
cUncased info = iota // 000
|
||||
cTitle // 001
|
||||
cLower // 010
|
||||
cUpper // 011
|
||||
cIgnorableUncased // 100
|
||||
cIgnorableCased // 101 // lower case if mappings exist
|
||||
cXORCase // 11x // case is cLower | ((rune&1) ^ x)
|
||||
|
||||
maxCaseMode = cUpper
|
||||
)
|
||||
|
||||
func (c info) isCased() bool {
|
||||
return c&casedMask != 0
|
||||
}
|
||||
|
||||
func (c info) isCaseIgnorable() bool {
|
||||
return c&ignorableMask == ignorableValue
|
||||
}
|
||||
|
||||
func (c info) isNotCasedAndNotCaseIgnorable() bool {
|
||||
return c&fullCasedMask == 0
|
||||
}
|
||||
|
||||
func (c info) isCaseIgnorableAndNotCased() bool {
|
||||
return c&fullCasedMask == cIgnorableUncased
|
||||
}
|
||||
|
||||
func (c info) isMid() bool {
|
||||
return c&(fullCasedMask|isMidBit) == isMidBit|cIgnorableUncased
|
||||
}
|
||||
|
||||
// The case mapping implementation will need to know about various Canonical
|
||||
// Combining Class (CCC) values. We encode two of these in the trie value:
|
||||
// cccZero (0) and cccAbove (230). If the value is cccOther, it means that
|
||||
// CCC(r) > 0, but not 230. A value of cccBreak means that CCC(r) == 0 and that
|
||||
// the rune also has the break category Break (see below).
|
||||
const (
|
||||
cccBreak info = iota << 4
|
||||
cccZero
|
||||
cccAbove
|
||||
cccOther
|
||||
|
||||
cccMask = cccBreak | cccZero | cccAbove | cccOther
|
||||
)
|
||||
|
||||
const (
|
||||
starter = 0
|
||||
above = 230
|
||||
iotaSubscript = 240
|
||||
)
|
||||
|
||||
// The exceptions slice holds data that does not fit in a normal info entry.
|
||||
// The entry is pointed to by the exception index in an entry. It has the
|
||||
// following format:
|
||||
//
|
||||
// Header
|
||||
// byte 0:
|
||||
// 7..6 unused
|
||||
// 5..4 CCC type (same bits as entry)
|
||||
// 3 unused
|
||||
// 2..0 length of fold
|
||||
//
|
||||
// byte 1:
|
||||
// 7..6 unused
|
||||
// 5..3 length of 1st mapping of case type
|
||||
// 2..0 length of 2nd mapping of case type
|
||||
//
|
||||
// case 1st 2nd
|
||||
// lower -> upper, title
|
||||
// upper -> lower, title
|
||||
// title -> lower, upper
|
||||
//
|
||||
// Lengths with the value 0x7 indicate no value and implies no change.
|
||||
// A length of 0 indicates a mapping to zero-length string.
|
||||
//
|
||||
// Body bytes:
|
||||
// case folding bytes
|
||||
// lowercase mapping bytes
|
||||
// uppercase mapping bytes
|
||||
// titlecase mapping bytes
|
||||
// closure mapping bytes (for NFKC_Casefold). (TODO)
|
||||
//
|
||||
// Fallbacks:
|
||||
// missing fold -> lower
|
||||
// missing title -> upper
|
||||
// all missing -> original rune
|
||||
//
|
||||
// exceptions starts with a dummy byte to enforce that there is no zero index
|
||||
// value.
|
||||
const (
|
||||
lengthMask = 0x07
|
||||
lengthBits = 3
|
||||
noChange = 0
|
||||
)
|
||||
|
||||
// References to generated trie.
|
||||
|
||||
var trie = newCaseTrie(0)
|
||||
|
||||
var sparse = sparseBlocks{
|
||||
values: sparseValues[:],
|
||||
offsets: sparseOffsets[:],
|
||||
}
|
||||
|
||||
// Sparse block lookup code.
|
||||
|
||||
// valueRange is an entry in a sparse block.
|
||||
type valueRange struct {
|
||||
value uint16
|
||||
lo, hi byte
|
||||
}
|
||||
|
||||
type sparseBlocks struct {
|
||||
values []valueRange
|
||||
offsets []uint16
|
||||
}
|
||||
|
||||
// lookup returns the value from values block n for byte b using binary search.
|
||||
func (s *sparseBlocks) lookup(n uint32, b byte) uint16 {
|
||||
lo := s.offsets[n]
|
||||
hi := s.offsets[n+1]
|
||||
for lo < hi {
|
||||
m := lo + (hi-lo)/2
|
||||
r := s.values[m]
|
||||
if r.lo <= b && b <= r.hi {
|
||||
return r.value
|
||||
}
|
||||
if b < r.lo {
|
||||
hi = m
|
||||
} else {
|
||||
lo = m + 1
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// lastRuneForTesting is the last rune used for testing. Everything after this
|
||||
// is boring.
|
||||
const lastRuneForTesting = rune(0x1FFFF)
|
61
vendor/golang.org/x/text/cases/icu.go
generated
vendored
61
vendor/golang.org/x/text/cases/icu.go
generated
vendored
|
@ -1,61 +0,0 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build icu
|
||||
|
||||
package cases
|
||||
|
||||
// Ideally these functions would be defined in a test file, but go test doesn't
|
||||
// allow CGO in tests. The build tag should ensure either way that these
|
||||
// functions will not end up in the package.
|
||||
|
||||
// TODO: Ensure that the correct ICU version is set.
|
||||
|
||||
/*
|
||||
#cgo LDFLAGS: -licui18n.57 -licuuc.57
|
||||
#include <stdlib.h>
|
||||
#include <unicode/ustring.h>
|
||||
#include <unicode/utypes.h>
|
||||
#include <unicode/localpointer.h>
|
||||
#include <unicode/ucasemap.h>
|
||||
*/
|
||||
import "C"
|
||||
|
||||
import "unsafe"
|
||||
|
||||
func doICU(tag, caser, input string) string {
|
||||
err := C.UErrorCode(0)
|
||||
loc := C.CString(tag)
|
||||
cm := C.ucasemap_open(loc, C.uint32_t(0), &err)
|
||||
|
||||
buf := make([]byte, len(input)*4)
|
||||
dst := (*C.char)(unsafe.Pointer(&buf[0]))
|
||||
src := C.CString(input)
|
||||
|
||||
cn := C.int32_t(0)
|
||||
|
||||
switch caser {
|
||||
case "fold":
|
||||
cn = C.ucasemap_utf8FoldCase(cm,
|
||||
dst, C.int32_t(len(buf)),
|
||||
src, C.int32_t(len(input)),
|
||||
&err)
|
||||
case "lower":
|
||||
cn = C.ucasemap_utf8ToLower(cm,
|
||||
dst, C.int32_t(len(buf)),
|
||||
src, C.int32_t(len(input)),
|
||||
&err)
|
||||
case "upper":
|
||||
cn = C.ucasemap_utf8ToUpper(cm,
|
||||
dst, C.int32_t(len(buf)),
|
||||
src, C.int32_t(len(input)),
|
||||
&err)
|
||||
case "title":
|
||||
cn = C.ucasemap_utf8ToTitle(cm,
|
||||
dst, C.int32_t(len(buf)),
|
||||
src, C.int32_t(len(input)),
|
||||
&err)
|
||||
}
|
||||
return string(buf[:cn])
|
||||
}
|
82
vendor/golang.org/x/text/cases/info.go
generated
vendored
82
vendor/golang.org/x/text/cases/info.go
generated
vendored
|
@ -1,82 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cases
|
||||
|
||||
func (c info) cccVal() info {
|
||||
if c&exceptionBit != 0 {
|
||||
return info(exceptions[c>>exceptionShift]) & cccMask
|
||||
}
|
||||
return c & cccMask
|
||||
}
|
||||
|
||||
func (c info) cccType() info {
|
||||
ccc := c.cccVal()
|
||||
if ccc <= cccZero {
|
||||
return cccZero
|
||||
}
|
||||
return ccc
|
||||
}
|
||||
|
||||
// TODO: Implement full Unicode breaking algorithm:
|
||||
// 1) Implement breaking in separate package.
|
||||
// 2) Use the breaker here.
|
||||
// 3) Compare table size and performance of using the more generic breaker.
|
||||
//
|
||||
// Note that we can extend the current algorithm to be much more accurate. This
|
||||
// only makes sense, though, if the performance and/or space penalty of using
|
||||
// the generic breaker is big. Extra data will only be needed for non-cased
|
||||
// runes, which means there are sufficient bits left in the caseType.
|
||||
// ICU prohibits breaking in such cases as well.
|
||||
|
||||
// For the purpose of title casing we use an approximation of the Unicode Word
|
||||
// Breaking algorithm defined in Annex #29:
|
||||
// http://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table.
|
||||
//
|
||||
// For our approximation, we group the Word Break types into the following
|
||||
// categories, with associated rules:
|
||||
//
|
||||
// 1) Letter:
|
||||
// ALetter, Hebrew_Letter, Numeric, ExtendNumLet, Extend, Format_FE, ZWJ.
|
||||
// Rule: Never break between consecutive runes of this category.
|
||||
//
|
||||
// 2) Mid:
|
||||
// MidLetter, MidNumLet, Single_Quote.
|
||||
// (Cf. case-ignorable: MidLetter, MidNumLet, Single_Quote or cat is Mn,
|
||||
// Me, Cf, Lm or Sk).
|
||||
// Rule: Don't break between Letter and Mid, but break between two Mids.
|
||||
//
|
||||
// 3) Break:
|
||||
// Any other category: NewLine, MidNum, CR, LF, Double_Quote, Katakana, and
|
||||
// Other.
|
||||
// These categories should always result in a break between two cased letters.
|
||||
// Rule: Always break.
|
||||
//
|
||||
// Note 1: the Katakana and MidNum categories can, in esoteric cases, result in
|
||||
// preventing a break between two cased letters. For now we will ignore this
|
||||
// (e.g. [ALetter] [ExtendNumLet] [Katakana] [ExtendNumLet] [ALetter] and
|
||||
// [ALetter] [Numeric] [MidNum] [Numeric] [ALetter].)
|
||||
//
|
||||
// Note 2: the rule for Mid is very approximate, but works in most cases. To
|
||||
// improve, we could store the categories in the trie value and use a FA to
|
||||
// manage breaks. See TODO comment above.
|
||||
//
|
||||
// Note 3: according to the spec, it is possible for the Extend category to
|
||||
// introduce breaks between other categories grouped in Letter. However, this
|
||||
// is undesirable for our purposes. ICU prevents breaks in such cases as well.
|
||||
|
||||
// isBreak returns whether this rune should introduce a break.
|
||||
func (c info) isBreak() bool {
|
||||
return c.cccVal() == cccBreak
|
||||
}
|
||||
|
||||
// isLetter returns whether the rune is of break type ALetter, Hebrew_Letter,
|
||||
// Numeric, ExtendNumLet, or Extend.
|
||||
func (c info) isLetter() bool {
|
||||
ccc := c.cccVal()
|
||||
if ccc == cccZero {
|
||||
return !c.isCaseIgnorable()
|
||||
}
|
||||
return ccc != cccBreak
|
||||
}
|
816
vendor/golang.org/x/text/cases/map.go
generated
vendored
816
vendor/golang.org/x/text/cases/map.go
generated
vendored
|
@ -1,816 +0,0 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cases
|
||||
|
||||
// This file contains the definitions of case mappings for all supported
|
||||
// languages. The rules for the language-specific tailorings were taken and
|
||||
// modified from the CLDR transform definitions in common/transforms.
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/internal"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/transform"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
// A mapFunc takes a context set to the current rune and writes the mapped
|
||||
// version to the same context. It may advance the context to the next rune. It
|
||||
// returns whether a checkpoint is possible: whether the pDst bytes written to
|
||||
// dst so far won't need changing as we see more source bytes.
|
||||
type mapFunc func(*context) bool
|
||||
|
||||
// A spanFunc takes a context set to the current rune and returns whether this
|
||||
// rune would be altered when written to the output. It may advance the context
|
||||
// to the next rune. It returns whether a checkpoint is possible.
|
||||
type spanFunc func(*context) bool
|
||||
|
||||
// maxIgnorable defines the maximum number of ignorables to consider for
|
||||
// lookahead operations.
|
||||
const maxIgnorable = 30
|
||||
|
||||
// supported lists the language tags for which we have tailorings.
|
||||
const supported = "und af az el lt nl tr"
|
||||
|
||||
func init() {
|
||||
tags := []language.Tag{}
|
||||
for _, s := range strings.Split(supported, " ") {
|
||||
tags = append(tags, language.MustParse(s))
|
||||
}
|
||||
matcher = internal.NewInheritanceMatcher(tags)
|
||||
Supported = language.NewCoverage(tags)
|
||||
}
|
||||
|
||||
var (
|
||||
matcher *internal.InheritanceMatcher
|
||||
|
||||
Supported language.Coverage
|
||||
|
||||
// We keep the following lists separate, instead of having a single per-
|
||||
// language struct, to give the compiler a chance to remove unused code.
|
||||
|
||||
// Some uppercase mappers are stateless, so we can precompute the
|
||||
// Transformers and save a bit on runtime allocations.
|
||||
upperFunc = []struct {
|
||||
upper mapFunc
|
||||
span spanFunc
|
||||
}{
|
||||
{nil, nil}, // und
|
||||
{nil, nil}, // af
|
||||
{aztrUpper(upper), isUpper}, // az
|
||||
{elUpper, noSpan}, // el
|
||||
{ltUpper(upper), noSpan}, // lt
|
||||
{nil, nil}, // nl
|
||||
{aztrUpper(upper), isUpper}, // tr
|
||||
}
|
||||
|
||||
undUpper transform.SpanningTransformer = &undUpperCaser{}
|
||||
undLower transform.SpanningTransformer = &undLowerCaser{}
|
||||
undLowerIgnoreSigma transform.SpanningTransformer = &undLowerIgnoreSigmaCaser{}
|
||||
|
||||
lowerFunc = []mapFunc{
|
||||
nil, // und
|
||||
nil, // af
|
||||
aztrLower, // az
|
||||
nil, // el
|
||||
ltLower, // lt
|
||||
nil, // nl
|
||||
aztrLower, // tr
|
||||
}
|
||||
|
||||
titleInfos = []struct {
|
||||
title mapFunc
|
||||
lower mapFunc
|
||||
titleSpan spanFunc
|
||||
rewrite func(*context)
|
||||
}{
|
||||
{title, lower, isTitle, nil}, // und
|
||||
{title, lower, isTitle, afnlRewrite}, // af
|
||||
{aztrUpper(title), aztrLower, isTitle, nil}, // az
|
||||
{title, lower, isTitle, nil}, // el
|
||||
{ltUpper(title), ltLower, noSpan, nil}, // lt
|
||||
{nlTitle, lower, nlTitleSpan, afnlRewrite}, // nl
|
||||
{aztrUpper(title), aztrLower, isTitle, nil}, // tr
|
||||
}
|
||||
)
|
||||
|
||||
func makeUpper(t language.Tag, o options) transform.SpanningTransformer {
|
||||
_, i, _ := matcher.Match(t)
|
||||
f := upperFunc[i].upper
|
||||
if f == nil {
|
||||
return undUpper
|
||||
}
|
||||
return &simpleCaser{f: f, span: upperFunc[i].span}
|
||||
}
|
||||
|
||||
func makeLower(t language.Tag, o options) transform.SpanningTransformer {
|
||||
_, i, _ := matcher.Match(t)
|
||||
f := lowerFunc[i]
|
||||
if f == nil {
|
||||
if o.ignoreFinalSigma {
|
||||
return undLowerIgnoreSigma
|
||||
}
|
||||
return undLower
|
||||
}
|
||||
if o.ignoreFinalSigma {
|
||||
return &simpleCaser{f: f, span: isLower}
|
||||
}
|
||||
return &lowerCaser{
|
||||
first: f,
|
||||
midWord: finalSigma(f),
|
||||
}
|
||||
}
|
||||
|
||||
func makeTitle(t language.Tag, o options) transform.SpanningTransformer {
|
||||
_, i, _ := matcher.Match(t)
|
||||
x := &titleInfos[i]
|
||||
lower := x.lower
|
||||
if o.noLower {
|
||||
lower = (*context).copy
|
||||
} else if !o.ignoreFinalSigma {
|
||||
lower = finalSigma(lower)
|
||||
}
|
||||
return &titleCaser{
|
||||
title: x.title,
|
||||
lower: lower,
|
||||
titleSpan: x.titleSpan,
|
||||
rewrite: x.rewrite,
|
||||
}
|
||||
}
|
||||
|
||||
func noSpan(c *context) bool {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
|
||||
// TODO: consider a similar special case for the fast majority lower case. This
|
||||
// is a bit more involved so will require some more precise benchmarking to
|
||||
// justify it.
|
||||
|
||||
type undUpperCaser struct{ transform.NopResetter }
|
||||
|
||||
// undUpperCaser implements the Transformer interface for doing an upper case
|
||||
// mapping for the root locale (und). It eliminates the need for an allocation
|
||||
// as it prevents escaping by not using function pointers.
|
||||
func (t undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||
for c.next() {
|
||||
upper(&c)
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.ret()
|
||||
}
|
||||
|
||||
func (t undUpperCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
c := context{src: src, atEOF: atEOF}
|
||||
for c.next() && isUpper(&c) {
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.retSpan()
|
||||
}
|
||||
|
||||
// undLowerIgnoreSigmaCaser implements the Transformer interface for doing
|
||||
// a lower case mapping for the root locale (und) ignoring final sigma
|
||||
// handling. This casing algorithm is used in some performance-critical packages
|
||||
// like secure/precis and x/net/http/idna, which warrants its special-casing.
|
||||
type undLowerIgnoreSigmaCaser struct{ transform.NopResetter }
|
||||
|
||||
func (t undLowerIgnoreSigmaCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||
for c.next() && lower(&c) {
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.ret()
|
||||
|
||||
}
|
||||
|
||||
// Span implements a generic lower-casing. This is possible as isLower works
|
||||
// for all lowercasing variants. All lowercase variants only vary in how they
|
||||
// transform a non-lowercase letter. They will never change an already lowercase
|
||||
// letter. In addition, there is no state.
|
||||
func (t undLowerIgnoreSigmaCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
c := context{src: src, atEOF: atEOF}
|
||||
for c.next() && isLower(&c) {
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.retSpan()
|
||||
}
|
||||
|
||||
type simpleCaser struct {
|
||||
context
|
||||
f mapFunc
|
||||
span spanFunc
|
||||
}
|
||||
|
||||
// simpleCaser implements the Transformer interface for doing a case operation
|
||||
// on a rune-by-rune basis.
|
||||
func (t *simpleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||
for c.next() && t.f(&c) {
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.ret()
|
||||
}
|
||||
|
||||
func (t *simpleCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
c := context{src: src, atEOF: atEOF}
|
||||
for c.next() && t.span(&c) {
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.retSpan()
|
||||
}
|
||||
|
||||
// undLowerCaser implements the Transformer interface for doing a lower case
|
||||
// mapping for the root locale (und) ignoring final sigma handling. This casing
|
||||
// algorithm is used in some performance-critical packages like secure/precis
|
||||
// and x/net/http/idna, which warrants its special-casing.
|
||||
type undLowerCaser struct{ transform.NopResetter }
|
||||
|
||||
func (t undLowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||
|
||||
for isInterWord := true; c.next(); {
|
||||
if isInterWord {
|
||||
if c.info.isCased() {
|
||||
if !lower(&c) {
|
||||
break
|
||||
}
|
||||
isInterWord = false
|
||||
} else if !c.copy() {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
if c.info.isNotCasedAndNotCaseIgnorable() {
|
||||
if !c.copy() {
|
||||
break
|
||||
}
|
||||
isInterWord = true
|
||||
} else if !c.hasPrefix("Σ") {
|
||||
if !lower(&c) {
|
||||
break
|
||||
}
|
||||
} else if !finalSigmaBody(&c) {
|
||||
break
|
||||
}
|
||||
}
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.ret()
|
||||
}
|
||||
|
||||
func (t undLowerCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
c := context{src: src, atEOF: atEOF}
|
||||
for c.next() && isLower(&c) {
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.retSpan()
|
||||
}
|
||||
|
||||
// lowerCaser implements the Transformer interface. The default Unicode lower
|
||||
// casing requires different treatment for the first and subsequent characters
|
||||
// of a word, most notably to handle the Greek final Sigma.
|
||||
type lowerCaser struct {
|
||||
undLowerIgnoreSigmaCaser
|
||||
|
||||
context
|
||||
|
||||
first, midWord mapFunc
|
||||
}
|
||||
|
||||
func (t *lowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
t.context = context{dst: dst, src: src, atEOF: atEOF}
|
||||
c := &t.context
|
||||
|
||||
for isInterWord := true; c.next(); {
|
||||
if isInterWord {
|
||||
if c.info.isCased() {
|
||||
if !t.first(c) {
|
||||
break
|
||||
}
|
||||
isInterWord = false
|
||||
} else if !c.copy() {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
if c.info.isNotCasedAndNotCaseIgnorable() {
|
||||
if !c.copy() {
|
||||
break
|
||||
}
|
||||
isInterWord = true
|
||||
} else if !t.midWord(c) {
|
||||
break
|
||||
}
|
||||
}
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.ret()
|
||||
}
|
||||
|
||||
// titleCaser implements the Transformer interface. Title casing algorithms
|
||||
// distinguish between the first letter of a word and subsequent letters of the
|
||||
// same word. It uses state to avoid requiring a potentially infinite lookahead.
|
||||
type titleCaser struct {
|
||||
context
|
||||
|
||||
// rune mappings used by the actual casing algorithms.
|
||||
title mapFunc
|
||||
lower mapFunc
|
||||
titleSpan spanFunc
|
||||
|
||||
rewrite func(*context)
|
||||
}
|
||||
|
||||
// Transform implements the standard Unicode title case algorithm as defined in
|
||||
// Chapter 3 of The Unicode Standard:
|
||||
// toTitlecase(X): Find the word boundaries in X according to Unicode Standard
|
||||
// Annex #29, "Unicode Text Segmentation." For each word boundary, find the
|
||||
// first cased character F following the word boundary. If F exists, map F to
|
||||
// Titlecase_Mapping(F); then map all characters C between F and the following
|
||||
// word boundary to Lowercase_Mapping(C).
|
||||
func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
t.context = context{dst: dst, src: src, atEOF: atEOF, isMidWord: t.isMidWord}
|
||||
c := &t.context
|
||||
|
||||
if !c.next() {
|
||||
return c.ret()
|
||||
}
|
||||
|
||||
for {
|
||||
p := c.info
|
||||
if t.rewrite != nil {
|
||||
t.rewrite(c)
|
||||
}
|
||||
|
||||
wasMid := p.isMid()
|
||||
// Break out of this loop on failure to ensure we do not modify the
|
||||
// state incorrectly.
|
||||
if p.isCased() {
|
||||
if !c.isMidWord {
|
||||
if !t.title(c) {
|
||||
break
|
||||
}
|
||||
c.isMidWord = true
|
||||
} else if !t.lower(c) {
|
||||
break
|
||||
}
|
||||
} else if !c.copy() {
|
||||
break
|
||||
} else if p.isBreak() {
|
||||
c.isMidWord = false
|
||||
}
|
||||
|
||||
// As we save the state of the transformer, it is safe to call
|
||||
// checkpoint after any successful write.
|
||||
if !(c.isMidWord && wasMid) {
|
||||
c.checkpoint()
|
||||
}
|
||||
|
||||
if !c.next() {
|
||||
break
|
||||
}
|
||||
if wasMid && c.info.isMid() {
|
||||
c.isMidWord = false
|
||||
}
|
||||
}
|
||||
return c.ret()
|
||||
}
|
||||
|
||||
func (t *titleCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
t.context = context{src: src, atEOF: atEOF, isMidWord: t.isMidWord}
|
||||
c := &t.context
|
||||
|
||||
if !c.next() {
|
||||
return c.retSpan()
|
||||
}
|
||||
|
||||
for {
|
||||
p := c.info
|
||||
if t.rewrite != nil {
|
||||
t.rewrite(c)
|
||||
}
|
||||
|
||||
wasMid := p.isMid()
|
||||
// Break out of this loop on failure to ensure we do not modify the
|
||||
// state incorrectly.
|
||||
if p.isCased() {
|
||||
if !c.isMidWord {
|
||||
if !t.titleSpan(c) {
|
||||
break
|
||||
}
|
||||
c.isMidWord = true
|
||||
} else if !isLower(c) {
|
||||
break
|
||||
}
|
||||
} else if p.isBreak() {
|
||||
c.isMidWord = false
|
||||
}
|
||||
// As we save the state of the transformer, it is safe to call
|
||||
// checkpoint after any successful write.
|
||||
if !(c.isMidWord && wasMid) {
|
||||
c.checkpoint()
|
||||
}
|
||||
|
||||
if !c.next() {
|
||||
break
|
||||
}
|
||||
if wasMid && c.info.isMid() {
|
||||
c.isMidWord = false
|
||||
}
|
||||
}
|
||||
return c.retSpan()
|
||||
}
|
||||
|
||||
// finalSigma adds Greek final Sigma handing to another casing function. It
|
||||
// determines whether a lowercased sigma should be σ or ς, by looking ahead for
|
||||
// case-ignorables and a cased letters.
|
||||
func finalSigma(f mapFunc) mapFunc {
|
||||
return func(c *context) bool {
|
||||
if !c.hasPrefix("Σ") {
|
||||
return f(c)
|
||||
}
|
||||
return finalSigmaBody(c)
|
||||
}
|
||||
}
|
||||
|
||||
func finalSigmaBody(c *context) bool {
|
||||
// Current rune must be ∑.
|
||||
|
||||
// ::NFD();
|
||||
// # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
|
||||
// Σ } [:case-ignorable:]* [:cased:] → σ;
|
||||
// [:cased:] [:case-ignorable:]* { Σ → ς;
|
||||
// ::Any-Lower;
|
||||
// ::NFC();
|
||||
|
||||
p := c.pDst
|
||||
c.writeString("ς")
|
||||
|
||||
// TODO: we should do this here, but right now this will never have an
|
||||
// effect as this is called when the prefix is Sigma, whereas Dutch and
|
||||
// Afrikaans only test for an apostrophe.
|
||||
//
|
||||
// if t.rewrite != nil {
|
||||
// t.rewrite(c)
|
||||
// }
|
||||
|
||||
// We need to do one more iteration after maxIgnorable, as a cased
|
||||
// letter is not an ignorable and may modify the result.
|
||||
wasMid := false
|
||||
for i := 0; i < maxIgnorable+1; i++ {
|
||||
if !c.next() {
|
||||
return false
|
||||
}
|
||||
if !c.info.isCaseIgnorable() {
|
||||
// All Midword runes are also case ignorable, so we are
|
||||
// guaranteed to have a letter or word break here. As we are
|
||||
// unreading the run, there is no need to unset c.isMidWord;
|
||||
// the title caser will handle this.
|
||||
if c.info.isCased() {
|
||||
// p+1 is guaranteed to be in bounds: if writing ς was
|
||||
// successful, p+1 will contain the second byte of ς. If not,
|
||||
// this function will have returned after c.next returned false.
|
||||
c.dst[p+1]++ // ς → σ
|
||||
}
|
||||
c.unreadRune()
|
||||
return true
|
||||
}
|
||||
// A case ignorable may also introduce a word break, so we may need
|
||||
// to continue searching even after detecting a break.
|
||||
isMid := c.info.isMid()
|
||||
if (wasMid && isMid) || c.info.isBreak() {
|
||||
c.isMidWord = false
|
||||
}
|
||||
wasMid = isMid
|
||||
c.copy()
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// finalSigmaSpan would be the same as isLower.
|
||||
|
||||
// elUpper implements Greek upper casing, which entails removing a predefined
|
||||
// set of non-blocked modifiers. Note that these accents should not be removed
|
||||
// for title casing!
|
||||
// Example: "Οδός" -> "ΟΔΟΣ".
|
||||
func elUpper(c *context) bool {
|
||||
// From CLDR:
|
||||
// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Above:]]*? { [\u0313\u0314\u0301\u0300\u0306\u0342\u0308\u0304] → ;
|
||||
// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Iota_Subscript:]]*? { \u0345 → ;
|
||||
|
||||
r, _ := utf8.DecodeRune(c.src[c.pSrc:])
|
||||
oldPDst := c.pDst
|
||||
if !upper(c) {
|
||||
return false
|
||||
}
|
||||
if !unicode.Is(unicode.Greek, r) {
|
||||
return true
|
||||
}
|
||||
i := 0
|
||||
// Take the properties of the uppercased rune that is already written to the
|
||||
// destination. This saves us the trouble of having to uppercase the
|
||||
// decomposed rune again.
|
||||
if b := norm.NFD.Properties(c.dst[oldPDst:]).Decomposition(); b != nil {
|
||||
// Restore the destination position and process the decomposed rune.
|
||||
r, sz := utf8.DecodeRune(b)
|
||||
if r <= 0xFF { // See A.6.1
|
||||
return true
|
||||
}
|
||||
c.pDst = oldPDst
|
||||
// Insert the first rune and ignore the modifiers. See A.6.2.
|
||||
c.writeBytes(b[:sz])
|
||||
i = len(b[sz:]) / 2 // Greek modifiers are always of length 2.
|
||||
}
|
||||
|
||||
for ; i < maxIgnorable && c.next(); i++ {
|
||||
switch r, _ := utf8.DecodeRune(c.src[c.pSrc:]); r {
|
||||
// Above and Iota Subscript
|
||||
case 0x0300, // U+0300 COMBINING GRAVE ACCENT
|
||||
0x0301, // U+0301 COMBINING ACUTE ACCENT
|
||||
0x0304, // U+0304 COMBINING MACRON
|
||||
0x0306, // U+0306 COMBINING BREVE
|
||||
0x0308, // U+0308 COMBINING DIAERESIS
|
||||
0x0313, // U+0313 COMBINING COMMA ABOVE
|
||||
0x0314, // U+0314 COMBINING REVERSED COMMA ABOVE
|
||||
0x0342, // U+0342 COMBINING GREEK PERISPOMENI
|
||||
0x0345: // U+0345 COMBINING GREEK YPOGEGRAMMENI
|
||||
// No-op. Gobble the modifier.
|
||||
|
||||
default:
|
||||
switch v, _ := trie.lookup(c.src[c.pSrc:]); info(v).cccType() {
|
||||
case cccZero:
|
||||
c.unreadRune()
|
||||
return true
|
||||
|
||||
// We don't need to test for IotaSubscript as the only rune that
|
||||
// qualifies (U+0345) was already excluded in the switch statement
|
||||
// above. See A.4.
|
||||
|
||||
case cccAbove:
|
||||
return c.copy()
|
||||
default:
|
||||
// Some other modifier. We're still allowed to gobble Greek
|
||||
// modifiers after this.
|
||||
c.copy()
|
||||
}
|
||||
}
|
||||
}
|
||||
return i == maxIgnorable
|
||||
}
|
||||
|
||||
// TODO: implement elUpperSpan (low-priority: complex and infrequent).
|
||||
|
||||
func ltLower(c *context) bool {
|
||||
// From CLDR:
|
||||
// # Introduce an explicit dot above when lowercasing capital I's and J's
|
||||
// # whenever there are more accents above.
|
||||
// # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
|
||||
// # 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
|
||||
// # 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
|
||||
// # 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
|
||||
// # 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
// # 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
// # 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
|
||||
// ::NFD();
|
||||
// I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
|
||||
// J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
|
||||
// I \u0328 (Į) } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307;
|
||||
// I \u0300 (Ì) → i \u0307 \u0300;
|
||||
// I \u0301 (Í) → i \u0307 \u0301;
|
||||
// I \u0303 (Ĩ) → i \u0307 \u0303;
|
||||
// ::Any-Lower();
|
||||
// ::NFC();
|
||||
|
||||
i := 0
|
||||
if r := c.src[c.pSrc]; r < utf8.RuneSelf {
|
||||
lower(c)
|
||||
if r != 'I' && r != 'J' {
|
||||
return true
|
||||
}
|
||||
} else {
|
||||
p := norm.NFD.Properties(c.src[c.pSrc:])
|
||||
if d := p.Decomposition(); len(d) >= 3 && (d[0] == 'I' || d[0] == 'J') {
|
||||
// UTF-8 optimization: the decomposition will only have an above
|
||||
// modifier if the last rune of the decomposition is in [U+300-U+311].
|
||||
// In all other cases, a decomposition starting with I is always
|
||||
// an I followed by modifiers that are not cased themselves. See A.2.
|
||||
if d[1] == 0xCC && d[2] <= 0x91 { // A.2.4.
|
||||
if !c.writeBytes(d[:1]) {
|
||||
return false
|
||||
}
|
||||
c.dst[c.pDst-1] += 'a' - 'A' // lower
|
||||
|
||||
// Assumption: modifier never changes on lowercase. See A.1.
|
||||
// Assumption: all modifiers added have CCC = Above. See A.2.3.
|
||||
return c.writeString("\u0307") && c.writeBytes(d[1:])
|
||||
}
|
||||
// In all other cases the additional modifiers will have a CCC
|
||||
// that is less than 230 (Above). We will insert the U+0307, if
|
||||
// needed, after these modifiers so that a string in FCD form
|
||||
// will remain so. See A.2.2.
|
||||
lower(c)
|
||||
i = 1
|
||||
} else {
|
||||
return lower(c)
|
||||
}
|
||||
}
|
||||
|
||||
for ; i < maxIgnorable && c.next(); i++ {
|
||||
switch c.info.cccType() {
|
||||
case cccZero:
|
||||
c.unreadRune()
|
||||
return true
|
||||
case cccAbove:
|
||||
return c.writeString("\u0307") && c.copy() // See A.1.
|
||||
default:
|
||||
c.copy() // See A.1.
|
||||
}
|
||||
}
|
||||
return i == maxIgnorable
|
||||
}
|
||||
|
||||
// ltLowerSpan would be the same as isLower.
|
||||
|
||||
func ltUpper(f mapFunc) mapFunc {
|
||||
return func(c *context) bool {
|
||||
// Unicode:
|
||||
// 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
|
||||
//
|
||||
// From CLDR:
|
||||
// # Remove \u0307 following soft-dotteds (i, j, and the like), with possible
|
||||
// # intervening non-230 marks.
|
||||
// ::NFD();
|
||||
// [:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ;
|
||||
// ::Any-Upper();
|
||||
// ::NFC();
|
||||
|
||||
// TODO: See A.5. A soft-dotted rune never has an exception. This would
|
||||
// allow us to overload the exception bit and encode this property in
|
||||
// info. Need to measure performance impact of this.
|
||||
r, _ := utf8.DecodeRune(c.src[c.pSrc:])
|
||||
oldPDst := c.pDst
|
||||
if !f(c) {
|
||||
return false
|
||||
}
|
||||
if !unicode.Is(unicode.Soft_Dotted, r) {
|
||||
return true
|
||||
}
|
||||
|
||||
// We don't need to do an NFD normalization, as a soft-dotted rune never
|
||||
// contains U+0307. See A.3.
|
||||
|
||||
i := 0
|
||||
for ; i < maxIgnorable && c.next(); i++ {
|
||||
switch c.info.cccType() {
|
||||
case cccZero:
|
||||
c.unreadRune()
|
||||
return true
|
||||
case cccAbove:
|
||||
if c.hasPrefix("\u0307") {
|
||||
// We don't do a full NFC, but rather combine runes for
|
||||
// some of the common cases. (Returning NFC or
|
||||
// preserving normal form is neither a requirement nor
|
||||
// a possibility anyway).
|
||||
if !c.next() {
|
||||
return false
|
||||
}
|
||||
if c.dst[oldPDst] == 'I' && c.pDst == oldPDst+1 && c.src[c.pSrc] == 0xcc {
|
||||
s := ""
|
||||
switch c.src[c.pSrc+1] {
|
||||
case 0x80: // U+0300 COMBINING GRAVE ACCENT
|
||||
s = "\u00cc" // U+00CC LATIN CAPITAL LETTER I WITH GRAVE
|
||||
case 0x81: // U+0301 COMBINING ACUTE ACCENT
|
||||
s = "\u00cd" // U+00CD LATIN CAPITAL LETTER I WITH ACUTE
|
||||
case 0x83: // U+0303 COMBINING TILDE
|
||||
s = "\u0128" // U+0128 LATIN CAPITAL LETTER I WITH TILDE
|
||||
case 0x88: // U+0308 COMBINING DIAERESIS
|
||||
s = "\u00cf" // U+00CF LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
default:
|
||||
}
|
||||
if s != "" {
|
||||
c.pDst = oldPDst
|
||||
return c.writeString(s)
|
||||
}
|
||||
}
|
||||
}
|
||||
return c.copy()
|
||||
default:
|
||||
c.copy()
|
||||
}
|
||||
}
|
||||
return i == maxIgnorable
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: implement ltUpperSpan (low priority: complex and infrequent).
|
||||
|
||||
func aztrUpper(f mapFunc) mapFunc {
|
||||
return func(c *context) bool {
|
||||
// i→İ;
|
||||
if c.src[c.pSrc] == 'i' {
|
||||
return c.writeString("İ")
|
||||
}
|
||||
return f(c)
|
||||
}
|
||||
}
|
||||
|
||||
func aztrLower(c *context) (done bool) {
|
||||
// From CLDR:
|
||||
// # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
|
||||
// # 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
// İ→i;
|
||||
// # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
|
||||
// # This matches the behavior of the canonically equivalent I-dot_above
|
||||
// # 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
|
||||
// # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
|
||||
// # 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
|
||||
// I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)\u0307 → i$1 ;
|
||||
// I→ı ;
|
||||
// ::Any-Lower();
|
||||
if c.hasPrefix("\u0130") { // İ
|
||||
return c.writeString("i")
|
||||
}
|
||||
if c.src[c.pSrc] != 'I' {
|
||||
return lower(c)
|
||||
}
|
||||
|
||||
// We ignore the lower-case I for now, but insert it later when we know
|
||||
// which form we need.
|
||||
start := c.pSrc + c.sz
|
||||
|
||||
i := 0
|
||||
Loop:
|
||||
// We check for up to n ignorables before \u0307. As \u0307 is an
|
||||
// ignorable as well, n is maxIgnorable-1.
|
||||
for ; i < maxIgnorable && c.next(); i++ {
|
||||
switch c.info.cccType() {
|
||||
case cccAbove:
|
||||
if c.hasPrefix("\u0307") {
|
||||
return c.writeString("i") && c.writeBytes(c.src[start:c.pSrc]) // ignore U+0307
|
||||
}
|
||||
done = true
|
||||
break Loop
|
||||
case cccZero:
|
||||
c.unreadRune()
|
||||
done = true
|
||||
break Loop
|
||||
default:
|
||||
// We'll write this rune after we know which starter to use.
|
||||
}
|
||||
}
|
||||
if i == maxIgnorable {
|
||||
done = true
|
||||
}
|
||||
return c.writeString("ı") && c.writeBytes(c.src[start:c.pSrc+c.sz]) && done
|
||||
}
|
||||
|
||||
// aztrLowerSpan would be the same as isLower.
|
||||
|
||||
func nlTitle(c *context) bool {
|
||||
// From CLDR:
|
||||
// # Special titlecasing for Dutch initial "ij".
|
||||
// ::Any-Title();
|
||||
// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
|
||||
// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
|
||||
if c.src[c.pSrc] != 'I' && c.src[c.pSrc] != 'i' {
|
||||
return title(c)
|
||||
}
|
||||
|
||||
if !c.writeString("I") || !c.next() {
|
||||
return false
|
||||
}
|
||||
if c.src[c.pSrc] == 'j' || c.src[c.pSrc] == 'J' {
|
||||
return c.writeString("J")
|
||||
}
|
||||
c.unreadRune()
|
||||
return true
|
||||
}
|
||||
|
||||
func nlTitleSpan(c *context) bool {
|
||||
// From CLDR:
|
||||
// # Special titlecasing for Dutch initial "ij".
|
||||
// ::Any-Title();
|
||||
// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
|
||||
// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
|
||||
if c.src[c.pSrc] != 'I' {
|
||||
return isTitle(c)
|
||||
}
|
||||
if !c.next() || c.src[c.pSrc] == 'j' {
|
||||
return false
|
||||
}
|
||||
if c.src[c.pSrc] != 'J' {
|
||||
c.unreadRune()
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Not part of CLDR, but see http://unicode.org/cldr/trac/ticket/7078.
|
||||
func afnlRewrite(c *context) {
|
||||
if c.hasPrefix("'") || c.hasPrefix("’") {
|
||||
c.isMidWord = true
|
||||
}
|
||||
}
|
2211
vendor/golang.org/x/text/cases/tables.go
generated
vendored
2211
vendor/golang.org/x/text/cases/tables.go
generated
vendored
File diff suppressed because it is too large
Load diff
215
vendor/golang.org/x/text/cases/trieval.go
generated
vendored
215
vendor/golang.org/x/text/cases/trieval.go
generated
vendored
|
@ -1,215 +0,0 @@
|
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package cases
|
||||
|
||||
// This file contains definitions for interpreting the trie value of the case
|
||||
// trie generated by "go run gen*.go". It is shared by both the generator
|
||||
// program and the resultant package. Sharing is achieved by the generator
|
||||
// copying gen_trieval.go to trieval.go and changing what's above this comment.
|
||||
|
||||
// info holds case information for a single rune. It is the value returned
|
||||
// by a trie lookup. Most mapping information can be stored in a single 16-bit
|
||||
// value. If not, for example when a rune is mapped to multiple runes, the value
|
||||
// stores some basic case data and an index into an array with additional data.
|
||||
//
|
||||
// The per-rune values have the following format:
|
||||
//
|
||||
// if (exception) {
|
||||
// 15..5 unsigned exception index
|
||||
// 4 unused
|
||||
// } else {
|
||||
// 15..8 XOR pattern or index to XOR pattern for case mapping
|
||||
// Only 13..8 are used for XOR patterns.
|
||||
// 7 inverseFold (fold to upper, not to lower)
|
||||
// 6 index: interpret the XOR pattern as an index
|
||||
// or isMid if case mode is cIgnorableUncased.
|
||||
// 5..4 CCC: zero (normal or break), above or other
|
||||
// }
|
||||
// 3 exception: interpret this value as an exception index
|
||||
// (TODO: is this bit necessary? Probably implied from case mode.)
|
||||
// 2..0 case mode
|
||||
//
|
||||
// For the non-exceptional cases, a rune must be either uncased, lowercase or
|
||||
// uppercase. If the rune is cased, the XOR pattern maps either a lowercase
|
||||
// rune to uppercase or an uppercase rune to lowercase (applied to the 10
|
||||
// least-significant bits of the rune).
|
||||
//
|
||||
// See the definitions below for a more detailed description of the various
|
||||
// bits.
|
||||
type info uint16
|
||||
|
||||
const (
|
||||
casedMask = 0x0003
|
||||
fullCasedMask = 0x0007
|
||||
ignorableMask = 0x0006
|
||||
ignorableValue = 0x0004
|
||||
|
||||
inverseFoldBit = 1 << 7
|
||||
isMidBit = 1 << 6
|
||||
|
||||
exceptionBit = 1 << 3
|
||||
exceptionShift = 5
|
||||
numExceptionBits = 11
|
||||
|
||||
xorIndexBit = 1 << 6
|
||||
xorShift = 8
|
||||
|
||||
// There is no mapping if all xor bits and the exception bit are zero.
|
||||
hasMappingMask = 0xff80 | exceptionBit
|
||||
)
|
||||
|
||||
// The case mode bits encodes the case type of a rune. This includes uncased,
|
||||
// title, upper and lower case and case ignorable. (For a definition of these
|
||||
// terms see Chapter 3 of The Unicode Standard Core Specification.) In some rare
|
||||
// cases, a rune can be both cased and case-ignorable. This is encoded by
|
||||
// cIgnorableCased. A rune of this type is always lower case. Some runes are
|
||||
// cased while not having a mapping.
|
||||
//
|
||||
// A common pattern for scripts in the Unicode standard is for upper and lower
|
||||
// case runes to alternate for increasing rune values (e.g. the accented Latin
|
||||
// ranges starting from U+0100 and U+1E00 among others and some Cyrillic
|
||||
// characters). We use this property by defining a cXORCase mode, where the case
|
||||
// mode (always upper or lower case) is derived from the rune value. As the XOR
|
||||
// pattern for case mappings is often identical for successive runes, using
|
||||
// cXORCase can result in large series of identical trie values. This, in turn,
|
||||
// allows us to better compress the trie blocks.
|
||||
const (
|
||||
cUncased info = iota // 000
|
||||
cTitle // 001
|
||||
cLower // 010
|
||||
cUpper // 011
|
||||
cIgnorableUncased // 100
|
||||
cIgnorableCased // 101 // lower case if mappings exist
|
||||
cXORCase // 11x // case is cLower | ((rune&1) ^ x)
|
||||
|
||||
maxCaseMode = cUpper
|
||||
)
|
||||
|
||||
func (c info) isCased() bool {
|
||||
return c&casedMask != 0
|
||||
}
|
||||
|
||||
func (c info) isCaseIgnorable() bool {
|
||||
return c&ignorableMask == ignorableValue
|
||||
}
|
||||
|
||||
func (c info) isNotCasedAndNotCaseIgnorable() bool {
|
||||
return c&fullCasedMask == 0
|
||||
}
|
||||
|
||||
func (c info) isCaseIgnorableAndNotCased() bool {
|
||||
return c&fullCasedMask == cIgnorableUncased
|
||||
}
|
||||
|
||||
func (c info) isMid() bool {
|
||||
return c&(fullCasedMask|isMidBit) == isMidBit|cIgnorableUncased
|
||||
}
|
||||
|
||||
// The case mapping implementation will need to know about various Canonical
|
||||
// Combining Class (CCC) values. We encode two of these in the trie value:
|
||||
// cccZero (0) and cccAbove (230). If the value is cccOther, it means that
|
||||
// CCC(r) > 0, but not 230. A value of cccBreak means that CCC(r) == 0 and that
|
||||
// the rune also has the break category Break (see below).
|
||||
const (
|
||||
cccBreak info = iota << 4
|
||||
cccZero
|
||||
cccAbove
|
||||
cccOther
|
||||
|
||||
cccMask = cccBreak | cccZero | cccAbove | cccOther
|
||||
)
|
||||
|
||||
const (
|
||||
starter = 0
|
||||
above = 230
|
||||
iotaSubscript = 240
|
||||
)
|
||||
|
||||
// The exceptions slice holds data that does not fit in a normal info entry.
|
||||
// The entry is pointed to by the exception index in an entry. It has the
|
||||
// following format:
|
||||
//
|
||||
// Header
|
||||
// byte 0:
|
||||
// 7..6 unused
|
||||
// 5..4 CCC type (same bits as entry)
|
||||
// 3 unused
|
||||
// 2..0 length of fold
|
||||
//
|
||||
// byte 1:
|
||||
// 7..6 unused
|
||||
// 5..3 length of 1st mapping of case type
|
||||
// 2..0 length of 2nd mapping of case type
|
||||
//
|
||||
// case 1st 2nd
|
||||
// lower -> upper, title
|
||||
// upper -> lower, title
|
||||
// title -> lower, upper
|
||||
//
|
||||
// Lengths with the value 0x7 indicate no value and implies no change.
|
||||
// A length of 0 indicates a mapping to zero-length string.
|
||||
//
|
||||
// Body bytes:
|
||||
// case folding bytes
|
||||
// lowercase mapping bytes
|
||||
// uppercase mapping bytes
|
||||
// titlecase mapping bytes
|
||||
// closure mapping bytes (for NFKC_Casefold). (TODO)
|
||||
//
|
||||
// Fallbacks:
|
||||
// missing fold -> lower
|
||||
// missing title -> upper
|
||||
// all missing -> original rune
|
||||
//
|
||||
// exceptions starts with a dummy byte to enforce that there is no zero index
|
||||
// value.
|
||||
const (
|
||||
lengthMask = 0x07
|
||||
lengthBits = 3
|
||||
noChange = 0
|
||||
)
|
||||
|
||||
// References to generated trie.
|
||||
|
||||
var trie = newCaseTrie(0)
|
||||
|
||||
var sparse = sparseBlocks{
|
||||
values: sparseValues[:],
|
||||
offsets: sparseOffsets[:],
|
||||
}
|
||||
|
||||
// Sparse block lookup code.
|
||||
|
||||
// valueRange is an entry in a sparse block.
|
||||
type valueRange struct {
|
||||
value uint16
|
||||
lo, hi byte
|
||||
}
|
||||
|
||||
type sparseBlocks struct {
|
||||
values []valueRange
|
||||
offsets []uint16
|
||||
}
|
||||
|
||||
// lookup returns the value from values block n for byte b using binary search.
|
||||
func (s *sparseBlocks) lookup(n uint32, b byte) uint16 {
|
||||
lo := s.offsets[n]
|
||||
hi := s.offsets[n+1]
|
||||
for lo < hi {
|
||||
m := lo + (hi-lo)/2
|
||||
r := s.values[m]
|
||||
if r.lo <= b && b <= r.hi {
|
||||
return r.value
|
||||
}
|
||||
if b < r.lo {
|
||||
hi = m
|
||||
} else {
|
||||
lo = m + 1
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// lastRuneForTesting is the last rune used for testing. Everything after this
|
||||
// is boring.
|
||||
const lastRuneForTesting = rune(0x1FFFF)
|
52
vendor/golang.org/x/text/internal/gen.go
generated
vendored
52
vendor/golang.org/x/text/internal/gen.go
generated
vendored
|
@ -1,52 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/unicode/cldr"
|
||||
)
|
||||
|
||||
func main() {
|
||||
r := gen.OpenCLDRCoreZip()
|
||||
defer r.Close()
|
||||
|
||||
d := &cldr.Decoder{}
|
||||
data, err := d.DecodeZip(r)
|
||||
if err != nil {
|
||||
log.Fatalf("DecodeZip: %v", err)
|
||||
}
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile("tables.go", "internal")
|
||||
|
||||
// Create parents table.
|
||||
parents := make([]uint16, language.NumCompactTags)
|
||||
for _, loc := range data.Locales() {
|
||||
tag := language.MustParse(loc)
|
||||
index, ok := language.CompactIndex(tag)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
parentIndex := 0 // und
|
||||
for p := tag.Parent(); p != language.Und; p = p.Parent() {
|
||||
if x, ok := language.CompactIndex(p); ok {
|
||||
parentIndex = x
|
||||
break
|
||||
}
|
||||
}
|
||||
parents[index] = uint16(parentIndex)
|
||||
}
|
||||
|
||||
w.WriteComment(`
|
||||
Parent maps a compact index of a tag to the compact index of the parent of
|
||||
this tag.`)
|
||||
w.WriteVar("Parent", parents)
|
||||
}
|
51
vendor/golang.org/x/text/internal/internal.go
generated
vendored
51
vendor/golang.org/x/text/internal/internal.go
generated
vendored
|
@ -1,51 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go
|
||||
|
||||
// Package internal contains non-exported functionality that are used by
|
||||
// packages in the text repository.
|
||||
package internal // import "golang.org/x/text/internal"
|
||||
|
||||
import (
|
||||
"sort"
|
||||
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
// SortTags sorts tags in place.
|
||||
func SortTags(tags []language.Tag) {
|
||||
sort.Sort(sorter(tags))
|
||||
}
|
||||
|
||||
type sorter []language.Tag
|
||||
|
||||
func (s sorter) Len() int {
|
||||
return len(s)
|
||||
}
|
||||
|
||||
func (s sorter) Swap(i, j int) {
|
||||
s[i], s[j] = s[j], s[i]
|
||||
}
|
||||
|
||||
func (s sorter) Less(i, j int) bool {
|
||||
return s[i].String() < s[j].String()
|
||||
}
|
||||
|
||||
// UniqueTags sorts and filters duplicate tags in place and returns a slice with
|
||||
// only unique tags.
|
||||
func UniqueTags(tags []language.Tag) []language.Tag {
|
||||
if len(tags) <= 1 {
|
||||
return tags
|
||||
}
|
||||
SortTags(tags)
|
||||
k := 0
|
||||
for i := 1; i < len(tags); i++ {
|
||||
if tags[k].String() < tags[i].String() {
|
||||
k++
|
||||
tags[k] = tags[i]
|
||||
}
|
||||
}
|
||||
return tags[:k+1]
|
||||
}
|
67
vendor/golang.org/x/text/internal/match.go
generated
vendored
67
vendor/golang.org/x/text/internal/match.go
generated
vendored
|
@ -1,67 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package internal
|
||||
|
||||
// This file contains matchers that implement CLDR inheritance.
|
||||
//
|
||||
// See http://unicode.org/reports/tr35/#Locale_Inheritance.
|
||||
//
|
||||
// Some of the inheritance described in this document is already handled by
|
||||
// the cldr package.
|
||||
|
||||
import (
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
// TODO: consider if (some of the) matching algorithm needs to be public after
|
||||
// getting some feel about what is generic and what is specific.
|
||||
|
||||
// NewInheritanceMatcher returns a matcher that matches based on the inheritance
|
||||
// chain.
|
||||
//
|
||||
// The matcher uses canonicalization and the parent relationship to find a
|
||||
// match. The resulting match will always be either Und or a language with the
|
||||
// same language and script as the requested language. It will not match
|
||||
// languages for which there is understood to be mutual or one-directional
|
||||
// intelligibility.
|
||||
//
|
||||
// A Match will indicate an Exact match if the language matches after
|
||||
// canonicalization and High if the matched tag is a parent.
|
||||
func NewInheritanceMatcher(t []language.Tag) *InheritanceMatcher {
|
||||
tags := &InheritanceMatcher{make(map[language.Tag]int)}
|
||||
for i, tag := range t {
|
||||
ct, err := language.All.Canonicalize(tag)
|
||||
if err != nil {
|
||||
ct = tag
|
||||
}
|
||||
tags.index[ct] = i
|
||||
}
|
||||
return tags
|
||||
}
|
||||
|
||||
type InheritanceMatcher struct {
|
||||
index map[language.Tag]int
|
||||
}
|
||||
|
||||
func (m InheritanceMatcher) Match(want ...language.Tag) (language.Tag, int, language.Confidence) {
|
||||
for _, t := range want {
|
||||
ct, err := language.All.Canonicalize(t)
|
||||
if err != nil {
|
||||
ct = t
|
||||
}
|
||||
conf := language.Exact
|
||||
for {
|
||||
if index, ok := m.index[ct]; ok {
|
||||
return ct, index, conf
|
||||
}
|
||||
if ct == language.Und {
|
||||
break
|
||||
}
|
||||
ct = ct.Parent()
|
||||
conf = language.High
|
||||
}
|
||||
}
|
||||
return language.Und, 0, language.No
|
||||
}
|
116
vendor/golang.org/x/text/internal/tables.go
generated
vendored
116
vendor/golang.org/x/text/internal/tables.go
generated
vendored
|
@ -1,116 +0,0 @@
|
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package internal
|
||||
|
||||
// Parent maps a compact index of a tag to the compact index of the parent of
|
||||
// this tag.
|
||||
var Parent = []uint16{ // 752 elements
|
||||
// Entry 0 - 3F
|
||||
0x0000, 0x0053, 0x00e5, 0x0000, 0x0003, 0x0003, 0x0000, 0x0006,
|
||||
0x0000, 0x0008, 0x0000, 0x000a, 0x0000, 0x000c, 0x000c, 0x000c,
|
||||
0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c,
|
||||
0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c,
|
||||
0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c,
|
||||
0x000c, 0x0000, 0x0000, 0x002a, 0x0000, 0x002c, 0x0000, 0x002e,
|
||||
0x0000, 0x0000, 0x0031, 0x0030, 0x0030, 0x0000, 0x0035, 0x0000,
|
||||
0x0037, 0x0000, 0x0039, 0x0000, 0x003b, 0x0000, 0x003d, 0x0000,
|
||||
// Entry 40 - 7F
|
||||
0x0000, 0x0040, 0x0000, 0x0042, 0x0042, 0x0000, 0x0045, 0x0045,
|
||||
0x0000, 0x0048, 0x0000, 0x004a, 0x0000, 0x0000, 0x004d, 0x004c,
|
||||
0x004c, 0x0000, 0x0051, 0x0051, 0x0051, 0x0051, 0x0000, 0x0056,
|
||||
0x0000, 0x0058, 0x0000, 0x005a, 0x0000, 0x005c, 0x005c, 0x0000,
|
||||
0x005f, 0x0000, 0x0061, 0x0000, 0x0063, 0x0000, 0x0065, 0x0065,
|
||||
0x0000, 0x0068, 0x0000, 0x006a, 0x006a, 0x006a, 0x006a, 0x006a,
|
||||
0x006a, 0x006a, 0x0000, 0x0072, 0x0000, 0x0074, 0x0000, 0x0076,
|
||||
0x0000, 0x0000, 0x0079, 0x0000, 0x007b, 0x0000, 0x007d, 0x0000,
|
||||
// Entry 80 - BF
|
||||
0x007f, 0x007f, 0x0000, 0x0082, 0x0082, 0x0000, 0x0085, 0x0086,
|
||||
0x0086, 0x0086, 0x0085, 0x0087, 0x0086, 0x0086, 0x0086, 0x0085,
|
||||
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0087, 0x0086,
|
||||
0x0086, 0x0086, 0x0086, 0x0087, 0x0086, 0x0087, 0x0086, 0x0086,
|
||||
0x0087, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086,
|
||||
0x0086, 0x0086, 0x0085, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086,
|
||||
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086,
|
||||
0x0086, 0x0086, 0x0086, 0x0086, 0x0085, 0x0086, 0x0085, 0x0086,
|
||||
// Entry C0 - FF
|
||||
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0087,
|
||||
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0085,
|
||||
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0087, 0x0086, 0x0086,
|
||||
0x0087, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086,
|
||||
0x0086, 0x0086, 0x0086, 0x0086, 0x0085, 0x0085, 0x0086, 0x0086,
|
||||
0x0085, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0000, 0x00ee,
|
||||
0x0000, 0x00f0, 0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x00f1,
|
||||
0x00f1, 0x00f1, 0x00f0, 0x00f1, 0x00f0, 0x00f0, 0x00f1, 0x00f1,
|
||||
// Entry 100 - 13F
|
||||
0x00f0, 0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x00f0, 0x00f1, 0x00f1,
|
||||
0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x0000, 0x010c, 0x0000, 0x010e,
|
||||
0x0000, 0x0110, 0x0000, 0x0112, 0x0112, 0x0000, 0x0115, 0x0115,
|
||||
0x0115, 0x0115, 0x0000, 0x011a, 0x0000, 0x011c, 0x0000, 0x011e,
|
||||
0x011e, 0x0000, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121,
|
||||
0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121,
|
||||
0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121,
|
||||
0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121,
|
||||
// Entry 140 - 17F
|
||||
0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121,
|
||||
0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121,
|
||||
0x0000, 0x0150, 0x0000, 0x0152, 0x0000, 0x0154, 0x0000, 0x0156,
|
||||
0x0000, 0x0158, 0x0000, 0x015a, 0x015a, 0x015a, 0x0000, 0x015e,
|
||||
0x0000, 0x0000, 0x0161, 0x0000, 0x0163, 0x0000, 0x0165, 0x0165,
|
||||
0x0165, 0x0000, 0x0169, 0x0000, 0x016b, 0x0000, 0x016d, 0x0000,
|
||||
0x016f, 0x016f, 0x0000, 0x0172, 0x0000, 0x0174, 0x0000, 0x0176,
|
||||
0x0000, 0x0178, 0x0000, 0x017a, 0x0000, 0x017c, 0x0000, 0x017e,
|
||||
// Entry 180 - 1BF
|
||||
0x0000, 0x0180, 0x0180, 0x0180, 0x0000, 0x0000, 0x0185, 0x0000,
|
||||
0x0000, 0x0188, 0x0000, 0x018a, 0x0000, 0x0000, 0x018d, 0x0000,
|
||||
0x018f, 0x0000, 0x0000, 0x0192, 0x0000, 0x0000, 0x0195, 0x0000,
|
||||
0x0197, 0x0000, 0x0199, 0x0000, 0x019b, 0x0000, 0x019d, 0x0000,
|
||||
0x019f, 0x0000, 0x01a1, 0x0000, 0x01a3, 0x0000, 0x01a5, 0x0000,
|
||||
0x01a7, 0x0000, 0x01a9, 0x01a9, 0x0000, 0x01ac, 0x0000, 0x01ae,
|
||||
0x0000, 0x01b0, 0x0000, 0x01b2, 0x0000, 0x01b4, 0x0000, 0x0000,
|
||||
0x01b7, 0x0000, 0x01b9, 0x0000, 0x01bb, 0x0000, 0x01bd, 0x0000,
|
||||
// Entry 1C0 - 1FF
|
||||
0x01bf, 0x0000, 0x01c1, 0x0000, 0x01c3, 0x01c3, 0x01c3, 0x01c3,
|
||||
0x0000, 0x01c8, 0x0000, 0x01ca, 0x01ca, 0x0000, 0x01cd, 0x0000,
|
||||
0x01cf, 0x0000, 0x01d1, 0x0000, 0x01d3, 0x0000, 0x01d5, 0x0000,
|
||||
0x01d7, 0x01d7, 0x0000, 0x01da, 0x0000, 0x01dc, 0x0000, 0x01de,
|
||||
0x0000, 0x01e0, 0x0000, 0x01e2, 0x0000, 0x01e4, 0x0000, 0x01e6,
|
||||
0x0000, 0x01e8, 0x0000, 0x01ea, 0x0000, 0x01ec, 0x01ec, 0x01ec,
|
||||
0x0000, 0x01f0, 0x0000, 0x01f2, 0x0000, 0x01f4, 0x0000, 0x01f6,
|
||||
0x0000, 0x0000, 0x01f9, 0x0000, 0x01fb, 0x01fb, 0x0000, 0x01fe,
|
||||
// Entry 200 - 23F
|
||||
0x0000, 0x0200, 0x0200, 0x0000, 0x0203, 0x0203, 0x0000, 0x0206,
|
||||
0x0206, 0x0206, 0x0206, 0x0206, 0x0206, 0x0206, 0x0000, 0x020e,
|
||||
0x0000, 0x0210, 0x0000, 0x0212, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0218, 0x0000, 0x0000, 0x021b, 0x0000, 0x021d, 0x021d,
|
||||
0x0000, 0x0220, 0x0000, 0x0222, 0x0222, 0x0000, 0x0000, 0x0226,
|
||||
0x0225, 0x0225, 0x0000, 0x0000, 0x022b, 0x0000, 0x022d, 0x0000,
|
||||
0x022f, 0x0000, 0x023b, 0x0231, 0x023b, 0x023b, 0x023b, 0x023b,
|
||||
0x023b, 0x023b, 0x023b, 0x0231, 0x023b, 0x023b, 0x0000, 0x023e,
|
||||
// Entry 240 - 27F
|
||||
0x023e, 0x023e, 0x0000, 0x0242, 0x0000, 0x0244, 0x0000, 0x0246,
|
||||
0x0246, 0x0000, 0x0249, 0x0000, 0x024b, 0x024b, 0x024b, 0x024b,
|
||||
0x024b, 0x024b, 0x0000, 0x0252, 0x0000, 0x0254, 0x0000, 0x0256,
|
||||
0x0000, 0x0258, 0x0000, 0x025a, 0x0000, 0x0000, 0x025d, 0x025d,
|
||||
0x025d, 0x0000, 0x0261, 0x0000, 0x0263, 0x0000, 0x0265, 0x0000,
|
||||
0x0000, 0x0268, 0x0267, 0x0267, 0x0000, 0x026c, 0x0000, 0x026e,
|
||||
0x0000, 0x0270, 0x0000, 0x0000, 0x0000, 0x0000, 0x0275, 0x0000,
|
||||
0x0000, 0x0278, 0x0000, 0x027a, 0x027a, 0x027a, 0x027a, 0x0000,
|
||||
// Entry 280 - 2BF
|
||||
0x027f, 0x027f, 0x027f, 0x0000, 0x0283, 0x0283, 0x0283, 0x0283,
|
||||
0x0283, 0x0000, 0x0289, 0x0289, 0x0289, 0x0289, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0291, 0x0291, 0x0291, 0x0000, 0x0295, 0x0295,
|
||||
0x0295, 0x0295, 0x0000, 0x0000, 0x029b, 0x029b, 0x029b, 0x029b,
|
||||
0x0000, 0x02a0, 0x0000, 0x02a2, 0x02a2, 0x0000, 0x02a5, 0x0000,
|
||||
0x02a7, 0x02a7, 0x0000, 0x0000, 0x02ab, 0x0000, 0x0000, 0x02ae,
|
||||
0x0000, 0x02b0, 0x02b0, 0x0000, 0x0000, 0x02b4, 0x0000, 0x02b6,
|
||||
0x0000, 0x02b8, 0x0000, 0x02ba, 0x0000, 0x02bc, 0x02bc, 0x0000,
|
||||
// Entry 2C0 - 2FF
|
||||
0x0000, 0x02c0, 0x0000, 0x02c2, 0x02bf, 0x02bf, 0x0000, 0x0000,
|
||||
0x02c7, 0x02c6, 0x02c6, 0x0000, 0x0000, 0x02cc, 0x0000, 0x02ce,
|
||||
0x0000, 0x02d0, 0x0000, 0x0000, 0x02d3, 0x0000, 0x0000, 0x0000,
|
||||
0x02d7, 0x0000, 0x02d9, 0x0000, 0x02db, 0x0000, 0x02dd, 0x02dd,
|
||||
0x0000, 0x02e0, 0x0000, 0x02e2, 0x0000, 0x02e4, 0x02e4, 0x02e4,
|
||||
0x02e4, 0x02e4, 0x0000, 0x02ea, 0x02eb, 0x02ea, 0x0000, 0x02ee,
|
||||
} // Size: 1528 bytes
|
||||
|
||||
// Total table size 1528 bytes (1KiB); checksum: B99CF952
|
100
vendor/golang.org/x/text/internal/tag/tag.go
generated
vendored
100
vendor/golang.org/x/text/internal/tag/tag.go
generated
vendored
|
@ -1,100 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package tag contains functionality handling tags and related data.
|
||||
package tag // import "golang.org/x/text/internal/tag"
|
||||
|
||||
import "sort"
|
||||
|
||||
// An Index converts tags to a compact numeric value.
|
||||
//
|
||||
// All elements are of size 4. Tags may be up to 4 bytes long. Excess bytes can
|
||||
// be used to store additional information about the tag.
|
||||
type Index string
|
||||
|
||||
// Elem returns the element data at the given index.
|
||||
func (s Index) Elem(x int) string {
|
||||
return string(s[x*4 : x*4+4])
|
||||
}
|
||||
|
||||
// Index reports the index of the given key or -1 if it could not be found.
|
||||
// Only the first len(key) bytes from the start of the 4-byte entries will be
|
||||
// considered for the search and the first match in Index will be returned.
|
||||
func (s Index) Index(key []byte) int {
|
||||
n := len(key)
|
||||
// search the index of the first entry with an equal or higher value than
|
||||
// key in s.
|
||||
index := sort.Search(len(s)/4, func(i int) bool {
|
||||
return cmp(s[i*4:i*4+n], key) != -1
|
||||
})
|
||||
i := index * 4
|
||||
if cmp(s[i:i+len(key)], key) != 0 {
|
||||
return -1
|
||||
}
|
||||
return index
|
||||
}
|
||||
|
||||
// Next finds the next occurrence of key after index x, which must have been
|
||||
// obtained from a call to Index using the same key. It returns x+1 or -1.
|
||||
func (s Index) Next(key []byte, x int) int {
|
||||
if x++; x*4 < len(s) && cmp(s[x*4:x*4+len(key)], key) == 0 {
|
||||
return x
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// cmp returns an integer comparing a and b lexicographically.
|
||||
func cmp(a Index, b []byte) int {
|
||||
n := len(a)
|
||||
if len(b) < n {
|
||||
n = len(b)
|
||||
}
|
||||
for i, c := range b[:n] {
|
||||
switch {
|
||||
case a[i] > c:
|
||||
return 1
|
||||
case a[i] < c:
|
||||
return -1
|
||||
}
|
||||
}
|
||||
switch {
|
||||
case len(a) < len(b):
|
||||
return -1
|
||||
case len(a) > len(b):
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// Compare returns an integer comparing a and b lexicographically.
|
||||
func Compare(a string, b []byte) int {
|
||||
return cmp(Index(a), b)
|
||||
}
|
||||
|
||||
// FixCase reformats b to the same pattern of cases as form.
|
||||
// If returns false if string b is malformed.
|
||||
func FixCase(form string, b []byte) bool {
|
||||
if len(form) != len(b) {
|
||||
return false
|
||||
}
|
||||
for i, c := range b {
|
||||
if form[i] <= 'Z' {
|
||||
if c >= 'a' {
|
||||
c -= 'z' - 'Z'
|
||||
}
|
||||
if c < 'A' || 'Z' < c {
|
||||
return false
|
||||
}
|
||||
} else {
|
||||
if c <= 'Z' {
|
||||
c += 'z' - 'Z'
|
||||
}
|
||||
if c < 'a' || 'z' < c {
|
||||
return false
|
||||
}
|
||||
}
|
||||
b[i] = c
|
||||
}
|
||||
return true
|
||||
}
|
16
vendor/golang.org/x/text/language/common.go
generated
vendored
16
vendor/golang.org/x/text/language/common.go
generated
vendored
|
@ -1,16 +0,0 @@
|
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package language
|
||||
|
||||
// This file contains code common to the maketables.go and the package code.
|
||||
|
||||
// langAliasType is the type of an alias in langAliasMap.
|
||||
type langAliasType int8
|
||||
|
||||
const (
|
||||
langDeprecated langAliasType = iota
|
||||
langMacro
|
||||
langLegacy
|
||||
|
||||
langAliasTypeUnknown langAliasType = -1
|
||||
)
|
197
vendor/golang.org/x/text/language/coverage.go
generated
vendored
197
vendor/golang.org/x/text/language/coverage.go
generated
vendored
|
@ -1,197 +0,0 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
)
|
||||
|
||||
// The Coverage interface is used to define the level of coverage of an
|
||||
// internationalization service. Note that not all types are supported by all
|
||||
// services. As lists may be generated on the fly, it is recommended that users
|
||||
// of a Coverage cache the results.
|
||||
type Coverage interface {
|
||||
// Tags returns the list of supported tags.
|
||||
Tags() []Tag
|
||||
|
||||
// BaseLanguages returns the list of supported base languages.
|
||||
BaseLanguages() []Base
|
||||
|
||||
// Scripts returns the list of supported scripts.
|
||||
Scripts() []Script
|
||||
|
||||
// Regions returns the list of supported regions.
|
||||
Regions() []Region
|
||||
}
|
||||
|
||||
var (
|
||||
// Supported defines a Coverage that lists all supported subtags. Tags
|
||||
// always returns nil.
|
||||
Supported Coverage = allSubtags{}
|
||||
)
|
||||
|
||||
// TODO:
|
||||
// - Support Variants, numbering systems.
|
||||
// - CLDR coverage levels.
|
||||
// - Set of common tags defined in this package.
|
||||
|
||||
type allSubtags struct{}
|
||||
|
||||
// Regions returns the list of supported regions. As all regions are in a
|
||||
// consecutive range, it simply returns a slice of numbers in increasing order.
|
||||
// The "undefined" region is not returned.
|
||||
func (s allSubtags) Regions() []Region {
|
||||
reg := make([]Region, numRegions)
|
||||
for i := range reg {
|
||||
reg[i] = Region{regionID(i + 1)}
|
||||
}
|
||||
return reg
|
||||
}
|
||||
|
||||
// Scripts returns the list of supported scripts. As all scripts are in a
|
||||
// consecutive range, it simply returns a slice of numbers in increasing order.
|
||||
// The "undefined" script is not returned.
|
||||
func (s allSubtags) Scripts() []Script {
|
||||
scr := make([]Script, numScripts)
|
||||
for i := range scr {
|
||||
scr[i] = Script{scriptID(i + 1)}
|
||||
}
|
||||
return scr
|
||||
}
|
||||
|
||||
// BaseLanguages returns the list of all supported base languages. It generates
|
||||
// the list by traversing the internal structures.
|
||||
func (s allSubtags) BaseLanguages() []Base {
|
||||
base := make([]Base, 0, numLanguages)
|
||||
for i := 0; i < langNoIndexOffset; i++ {
|
||||
// We included "und" already for the value 0.
|
||||
if i != nonCanonicalUnd {
|
||||
base = append(base, Base{langID(i)})
|
||||
}
|
||||
}
|
||||
i := langNoIndexOffset
|
||||
for _, v := range langNoIndex {
|
||||
for k := 0; k < 8; k++ {
|
||||
if v&1 == 1 {
|
||||
base = append(base, Base{langID(i)})
|
||||
}
|
||||
v >>= 1
|
||||
i++
|
||||
}
|
||||
}
|
||||
return base
|
||||
}
|
||||
|
||||
// Tags always returns nil.
|
||||
func (s allSubtags) Tags() []Tag {
|
||||
return nil
|
||||
}
|
||||
|
||||
// coverage is used used by NewCoverage which is used as a convenient way for
|
||||
// creating Coverage implementations for partially defined data. Very often a
|
||||
// package will only need to define a subset of slices. coverage provides a
|
||||
// convenient way to do this. Moreover, packages using NewCoverage, instead of
|
||||
// their own implementation, will not break if later new slice types are added.
|
||||
type coverage struct {
|
||||
tags func() []Tag
|
||||
bases func() []Base
|
||||
scripts func() []Script
|
||||
regions func() []Region
|
||||
}
|
||||
|
||||
func (s *coverage) Tags() []Tag {
|
||||
if s.tags == nil {
|
||||
return nil
|
||||
}
|
||||
return s.tags()
|
||||
}
|
||||
|
||||
// bases implements sort.Interface and is used to sort base languages.
|
||||
type bases []Base
|
||||
|
||||
func (b bases) Len() int {
|
||||
return len(b)
|
||||
}
|
||||
|
||||
func (b bases) Swap(i, j int) {
|
||||
b[i], b[j] = b[j], b[i]
|
||||
}
|
||||
|
||||
func (b bases) Less(i, j int) bool {
|
||||
return b[i].langID < b[j].langID
|
||||
}
|
||||
|
||||
// BaseLanguages returns the result from calling s.bases if it is specified or
|
||||
// otherwise derives the set of supported base languages from tags.
|
||||
func (s *coverage) BaseLanguages() []Base {
|
||||
if s.bases == nil {
|
||||
tags := s.Tags()
|
||||
if len(tags) == 0 {
|
||||
return nil
|
||||
}
|
||||
a := make([]Base, len(tags))
|
||||
for i, t := range tags {
|
||||
a[i] = Base{langID(t.lang)}
|
||||
}
|
||||
sort.Sort(bases(a))
|
||||
k := 0
|
||||
for i := 1; i < len(a); i++ {
|
||||
if a[k] != a[i] {
|
||||
k++
|
||||
a[k] = a[i]
|
||||
}
|
||||
}
|
||||
return a[:k+1]
|
||||
}
|
||||
return s.bases()
|
||||
}
|
||||
|
||||
func (s *coverage) Scripts() []Script {
|
||||
if s.scripts == nil {
|
||||
return nil
|
||||
}
|
||||
return s.scripts()
|
||||
}
|
||||
|
||||
func (s *coverage) Regions() []Region {
|
||||
if s.regions == nil {
|
||||
return nil
|
||||
}
|
||||
return s.regions()
|
||||
}
|
||||
|
||||
// NewCoverage returns a Coverage for the given lists. It is typically used by
|
||||
// packages providing internationalization services to define their level of
|
||||
// coverage. A list may be of type []T or func() []T, where T is either Tag,
|
||||
// Base, Script or Region. The returned Coverage derives the value for Bases
|
||||
// from Tags if no func or slice for []Base is specified. For other unspecified
|
||||
// types the returned Coverage will return nil for the respective methods.
|
||||
func NewCoverage(list ...interface{}) Coverage {
|
||||
s := &coverage{}
|
||||
for _, x := range list {
|
||||
switch v := x.(type) {
|
||||
case func() []Base:
|
||||
s.bases = v
|
||||
case func() []Script:
|
||||
s.scripts = v
|
||||
case func() []Region:
|
||||
s.regions = v
|
||||
case func() []Tag:
|
||||
s.tags = v
|
||||
case []Base:
|
||||
s.bases = func() []Base { return v }
|
||||
case []Script:
|
||||
s.scripts = func() []Script { return v }
|
||||
case []Region:
|
||||
s.regions = func() []Region { return v }
|
||||
case []Tag:
|
||||
s.tags = func() []Tag { return v }
|
||||
default:
|
||||
panic(fmt.Sprintf("language: unsupported set type %T", v))
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
20
vendor/golang.org/x/text/language/gen_common.go
generated
vendored
20
vendor/golang.org/x/text/language/gen_common.go
generated
vendored
|
@ -1,20 +0,0 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This file contains code common to the maketables.go and the package code.
|
||||
|
||||
// langAliasType is the type of an alias in langAliasMap.
|
||||
type langAliasType int8
|
||||
|
||||
const (
|
||||
langDeprecated langAliasType = iota
|
||||
langMacro
|
||||
langLegacy
|
||||
|
||||
langAliasTypeUnknown langAliasType = -1
|
||||
)
|
162
vendor/golang.org/x/text/language/gen_index.go
generated
vendored
162
vendor/golang.org/x/text/language/gen_index.go
generated
vendored
|
@ -1,162 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This file generates derivative tables based on the language package itself.
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/unicode/cldr"
|
||||
)
|
||||
|
||||
var (
|
||||
test = flag.Bool("test", false,
|
||||
"test existing tables; can be used to compare web data with package data.")
|
||||
|
||||
draft = flag.String("draft",
|
||||
"contributed",
|
||||
`Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
|
||||
)
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
|
||||
// Read the CLDR zip file.
|
||||
r := gen.OpenCLDRCoreZip()
|
||||
defer r.Close()
|
||||
|
||||
d := &cldr.Decoder{}
|
||||
data, err := d.DecodeZip(r)
|
||||
if err != nil {
|
||||
log.Fatalf("DecodeZip: %v", err)
|
||||
}
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer func() {
|
||||
buf := &bytes.Buffer{}
|
||||
|
||||
if _, err = w.WriteGo(buf, "language"); err != nil {
|
||||
log.Fatalf("Error formatting file index.go: %v", err)
|
||||
}
|
||||
|
||||
// Since we're generating a table for our own package we need to rewrite
|
||||
// doing the equivalent of go fmt -r 'language.b -> b'. Using
|
||||
// bytes.Replace will do.
|
||||
out := bytes.Replace(buf.Bytes(), []byte("language."), nil, -1)
|
||||
if err := ioutil.WriteFile("index.go", out, 0600); err != nil {
|
||||
log.Fatalf("Could not create file index.go: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
m := map[language.Tag]bool{}
|
||||
for _, lang := range data.Locales() {
|
||||
// We include all locales unconditionally to be consistent with en_US.
|
||||
// We want en_US, even though it has no data associated with it.
|
||||
|
||||
// TODO: put any of the languages for which no data exists at the end
|
||||
// of the index. This allows all components based on ICU to use that
|
||||
// as the cutoff point.
|
||||
// if x := data.RawLDML(lang); false ||
|
||||
// x.LocaleDisplayNames != nil ||
|
||||
// x.Characters != nil ||
|
||||
// x.Delimiters != nil ||
|
||||
// x.Measurement != nil ||
|
||||
// x.Dates != nil ||
|
||||
// x.Numbers != nil ||
|
||||
// x.Units != nil ||
|
||||
// x.ListPatterns != nil ||
|
||||
// x.Collations != nil ||
|
||||
// x.Segmentations != nil ||
|
||||
// x.Rbnf != nil ||
|
||||
// x.Annotations != nil ||
|
||||
// x.Metadata != nil {
|
||||
|
||||
// TODO: support POSIX natively, albeit non-standard.
|
||||
tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1))
|
||||
m[tag] = true
|
||||
// }
|
||||
}
|
||||
// Include locales for plural rules, which uses a different structure.
|
||||
for _, plurals := range data.Supplemental().Plurals {
|
||||
for _, rules := range plurals.PluralRules {
|
||||
for _, lang := range strings.Split(rules.Locales, " ") {
|
||||
m[language.Make(lang)] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var core, special []language.Tag
|
||||
|
||||
for t := range m {
|
||||
if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" {
|
||||
log.Fatalf("Unexpected extension %v in %v", x, t)
|
||||
}
|
||||
if len(t.Variants()) == 0 && len(t.Extensions()) == 0 {
|
||||
core = append(core, t)
|
||||
} else {
|
||||
special = append(special, t)
|
||||
}
|
||||
}
|
||||
|
||||
w.WriteComment(`
|
||||
NumCompactTags is the number of common tags. The maximum tag is
|
||||
NumCompactTags-1.`)
|
||||
w.WriteConst("NumCompactTags", len(core)+len(special))
|
||||
|
||||
sort.Sort(byAlpha(special))
|
||||
w.WriteVar("specialTags", special)
|
||||
|
||||
// TODO: order by frequency?
|
||||
sort.Sort(byAlpha(core))
|
||||
|
||||
// Size computations are just an estimate.
|
||||
w.Size += int(reflect.TypeOf(map[uint32]uint16{}).Size())
|
||||
w.Size += len(core) * 6 // size of uint32 and uint16
|
||||
|
||||
fmt.Fprintln(w)
|
||||
fmt.Fprintln(w, "var coreTags = map[uint32]uint16{")
|
||||
fmt.Fprintln(w, "0x0: 0, // und")
|
||||
i := len(special) + 1 // Und and special tags already written.
|
||||
for _, t := range core {
|
||||
if t == language.Und {
|
||||
continue
|
||||
}
|
||||
fmt.Fprint(w.Hash, t, i)
|
||||
b, s, r := t.Raw()
|
||||
fmt.Fprintf(w, "0x%s%s%s: %d, // %s\n",
|
||||
getIndex(b, 3), // 3 is enough as it is guaranteed to be a compact number
|
||||
getIndex(s, 2),
|
||||
getIndex(r, 3),
|
||||
i, t)
|
||||
i++
|
||||
}
|
||||
fmt.Fprintln(w, "}")
|
||||
}
|
||||
|
||||
// getIndex prints the subtag type and extracts its index of size nibble.
|
||||
// If the index is less than n nibbles, the result is prefixed with 0s.
|
||||
func getIndex(x interface{}, n int) string {
|
||||
s := fmt.Sprintf("%#v", x) // s is of form Type{typeID: 0x00}
|
||||
s = s[strings.Index(s, "0x")+2 : len(s)-1]
|
||||
return strings.Repeat("0", n-len(s)) + s
|
||||
}
|
||||
|
||||
type byAlpha []language.Tag
|
||||
|
||||
func (a byAlpha) Len() int { return len(a) }
|
||||
func (a byAlpha) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
||||
func (a byAlpha) Less(i, j int) bool { return a[i].String() < a[j].String() }
|
38
vendor/golang.org/x/text/language/go1_1.go
generated
vendored
38
vendor/golang.org/x/text/language/go1_1.go
generated
vendored
|
@ -1,38 +0,0 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !go1.2
|
||||
|
||||
package language
|
||||
|
||||
import "sort"
|
||||
|
||||
func sortStable(s sort.Interface) {
|
||||
ss := stableSort{
|
||||
s: s,
|
||||
pos: make([]int, s.Len()),
|
||||
}
|
||||
for i := range ss.pos {
|
||||
ss.pos[i] = i
|
||||
}
|
||||
sort.Sort(&ss)
|
||||
}
|
||||
|
||||
type stableSort struct {
|
||||
s sort.Interface
|
||||
pos []int
|
||||
}
|
||||
|
||||
func (s *stableSort) Len() int {
|
||||
return len(s.pos)
|
||||
}
|
||||
|
||||
func (s *stableSort) Less(i, j int) bool {
|
||||
return s.s.Less(i, j) || !s.s.Less(j, i) && s.pos[i] < s.pos[j]
|
||||
}
|
||||
|
||||
func (s *stableSort) Swap(i, j int) {
|
||||
s.s.Swap(i, j)
|
||||
s.pos[i], s.pos[j] = s.pos[j], s.pos[i]
|
||||
}
|
11
vendor/golang.org/x/text/language/go1_2.go
generated
vendored
11
vendor/golang.org/x/text/language/go1_2.go
generated
vendored
|
@ -1,11 +0,0 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build go1.2
|
||||
|
||||
package language
|
||||
|
||||
import "sort"
|
||||
|
||||
var sortStable = sort.Stable
|
767
vendor/golang.org/x/text/language/index.go
generated
vendored
767
vendor/golang.org/x/text/language/index.go
generated
vendored
|
@ -1,767 +0,0 @@
|
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package language
|
||||
|
||||
// NumCompactTags is the number of common tags. The maximum tag is
|
||||
// NumCompactTags-1.
|
||||
const NumCompactTags = 752
|
||||
|
||||
var specialTags = []Tag{ // 2 elements
|
||||
0: {lang: 0xd5, region: 0x6d, script: 0x0, pVariant: 0x5, pExt: 0xe, str: "ca-ES-valencia"},
|
||||
1: {lang: 0x134, region: 0x134, script: 0x0, pVariant: 0x5, pExt: 0x5, str: "en-US-u-va-posix"},
|
||||
} // Size: 72 bytes
|
||||
|
||||
var coreTags = map[uint32]uint16{
|
||||
0x0: 0, // und
|
||||
0x01500000: 3, // af
|
||||
0x015000d1: 4, // af-NA
|
||||
0x01500160: 5, // af-ZA
|
||||
0x01b00000: 6, // agq
|
||||
0x01b00051: 7, // agq-CM
|
||||
0x02000000: 8, // ak
|
||||
0x0200007f: 9, // ak-GH
|
||||
0x02600000: 10, // am
|
||||
0x0260006e: 11, // am-ET
|
||||
0x03900000: 12, // ar
|
||||
0x03900001: 13, // ar-001
|
||||
0x03900022: 14, // ar-AE
|
||||
0x03900038: 15, // ar-BH
|
||||
0x03900061: 16, // ar-DJ
|
||||
0x03900066: 17, // ar-DZ
|
||||
0x0390006a: 18, // ar-EG
|
||||
0x0390006b: 19, // ar-EH
|
||||
0x0390006c: 20, // ar-ER
|
||||
0x03900096: 21, // ar-IL
|
||||
0x0390009a: 22, // ar-IQ
|
||||
0x039000a0: 23, // ar-JO
|
||||
0x039000a7: 24, // ar-KM
|
||||
0x039000ab: 25, // ar-KW
|
||||
0x039000af: 26, // ar-LB
|
||||
0x039000b8: 27, // ar-LY
|
||||
0x039000b9: 28, // ar-MA
|
||||
0x039000c8: 29, // ar-MR
|
||||
0x039000e0: 30, // ar-OM
|
||||
0x039000ec: 31, // ar-PS
|
||||
0x039000f2: 32, // ar-QA
|
||||
0x03900107: 33, // ar-SA
|
||||
0x0390010a: 34, // ar-SD
|
||||
0x03900114: 35, // ar-SO
|
||||
0x03900116: 36, // ar-SS
|
||||
0x0390011b: 37, // ar-SY
|
||||
0x0390011f: 38, // ar-TD
|
||||
0x03900127: 39, // ar-TN
|
||||
0x0390015d: 40, // ar-YE
|
||||
0x03f00000: 41, // ars
|
||||
0x04200000: 42, // as
|
||||
0x04200098: 43, // as-IN
|
||||
0x04300000: 44, // asa
|
||||
0x0430012e: 45, // asa-TZ
|
||||
0x04700000: 46, // ast
|
||||
0x0470006d: 47, // ast-ES
|
||||
0x05700000: 48, // az
|
||||
0x0571e000: 49, // az-Cyrl
|
||||
0x0571e031: 50, // az-Cyrl-AZ
|
||||
0x05752000: 51, // az-Latn
|
||||
0x05752031: 52, // az-Latn-AZ
|
||||
0x05d00000: 53, // bas
|
||||
0x05d00051: 54, // bas-CM
|
||||
0x07000000: 55, // be
|
||||
0x07000046: 56, // be-BY
|
||||
0x07400000: 57, // bem
|
||||
0x07400161: 58, // bem-ZM
|
||||
0x07800000: 59, // bez
|
||||
0x0780012e: 60, // bez-TZ
|
||||
0x07d00000: 61, // bg
|
||||
0x07d00037: 62, // bg-BG
|
||||
0x08100000: 63, // bh
|
||||
0x09e00000: 64, // bm
|
||||
0x09e000c2: 65, // bm-ML
|
||||
0x0a300000: 66, // bn
|
||||
0x0a300034: 67, // bn-BD
|
||||
0x0a300098: 68, // bn-IN
|
||||
0x0a700000: 69, // bo
|
||||
0x0a700052: 70, // bo-CN
|
||||
0x0a700098: 71, // bo-IN
|
||||
0x0b000000: 72, // br
|
||||
0x0b000077: 73, // br-FR
|
||||
0x0b300000: 74, // brx
|
||||
0x0b300098: 75, // brx-IN
|
||||
0x0b500000: 76, // bs
|
||||
0x0b51e000: 77, // bs-Cyrl
|
||||
0x0b51e032: 78, // bs-Cyrl-BA
|
||||
0x0b552000: 79, // bs-Latn
|
||||
0x0b552032: 80, // bs-Latn-BA
|
||||
0x0d500000: 81, // ca
|
||||
0x0d500021: 82, // ca-AD
|
||||
0x0d50006d: 83, // ca-ES
|
||||
0x0d500077: 84, // ca-FR
|
||||
0x0d50009d: 85, // ca-IT
|
||||
0x0da00000: 86, // ce
|
||||
0x0da00105: 87, // ce-RU
|
||||
0x0dd00000: 88, // cgg
|
||||
0x0dd00130: 89, // cgg-UG
|
||||
0x0e300000: 90, // chr
|
||||
0x0e300134: 91, // chr-US
|
||||
0x0e700000: 92, // ckb
|
||||
0x0e70009a: 93, // ckb-IQ
|
||||
0x0e70009b: 94, // ckb-IR
|
||||
0x0f600000: 95, // cs
|
||||
0x0f60005d: 96, // cs-CZ
|
||||
0x0fa00000: 97, // cu
|
||||
0x0fa00105: 98, // cu-RU
|
||||
0x0fc00000: 99, // cy
|
||||
0x0fc0007a: 100, // cy-GB
|
||||
0x0fd00000: 101, // da
|
||||
0x0fd00062: 102, // da-DK
|
||||
0x0fd00081: 103, // da-GL
|
||||
0x10400000: 104, // dav
|
||||
0x104000a3: 105, // dav-KE
|
||||
0x10900000: 106, // de
|
||||
0x1090002d: 107, // de-AT
|
||||
0x10900035: 108, // de-BE
|
||||
0x1090004d: 109, // de-CH
|
||||
0x1090005f: 110, // de-DE
|
||||
0x1090009d: 111, // de-IT
|
||||
0x109000b1: 112, // de-LI
|
||||
0x109000b6: 113, // de-LU
|
||||
0x11300000: 114, // dje
|
||||
0x113000d3: 115, // dje-NE
|
||||
0x11b00000: 116, // dsb
|
||||
0x11b0005f: 117, // dsb-DE
|
||||
0x12000000: 118, // dua
|
||||
0x12000051: 119, // dua-CM
|
||||
0x12400000: 120, // dv
|
||||
0x12700000: 121, // dyo
|
||||
0x12700113: 122, // dyo-SN
|
||||
0x12900000: 123, // dz
|
||||
0x12900042: 124, // dz-BT
|
||||
0x12b00000: 125, // ebu
|
||||
0x12b000a3: 126, // ebu-KE
|
||||
0x12c00000: 127, // ee
|
||||
0x12c0007f: 128, // ee-GH
|
||||
0x12c00121: 129, // ee-TG
|
||||
0x13100000: 130, // el
|
||||
0x1310005c: 131, // el-CY
|
||||
0x13100086: 132, // el-GR
|
||||
0x13400000: 133, // en
|
||||
0x13400001: 134, // en-001
|
||||
0x1340001a: 135, // en-150
|
||||
0x13400024: 136, // en-AG
|
||||
0x13400025: 137, // en-AI
|
||||
0x1340002c: 138, // en-AS
|
||||
0x1340002d: 139, // en-AT
|
||||
0x1340002e: 140, // en-AU
|
||||
0x13400033: 141, // en-BB
|
||||
0x13400035: 142, // en-BE
|
||||
0x13400039: 143, // en-BI
|
||||
0x1340003c: 144, // en-BM
|
||||
0x13400041: 145, // en-BS
|
||||
0x13400045: 146, // en-BW
|
||||
0x13400047: 147, // en-BZ
|
||||
0x13400048: 148, // en-CA
|
||||
0x13400049: 149, // en-CC
|
||||
0x1340004d: 150, // en-CH
|
||||
0x1340004f: 151, // en-CK
|
||||
0x13400051: 152, // en-CM
|
||||
0x1340005b: 153, // en-CX
|
||||
0x1340005c: 154, // en-CY
|
||||
0x1340005f: 155, // en-DE
|
||||
0x13400060: 156, // en-DG
|
||||
0x13400062: 157, // en-DK
|
||||
0x13400063: 158, // en-DM
|
||||
0x1340006c: 159, // en-ER
|
||||
0x13400071: 160, // en-FI
|
||||
0x13400072: 161, // en-FJ
|
||||
0x13400073: 162, // en-FK
|
||||
0x13400074: 163, // en-FM
|
||||
0x1340007a: 164, // en-GB
|
||||
0x1340007b: 165, // en-GD
|
||||
0x1340007e: 166, // en-GG
|
||||
0x1340007f: 167, // en-GH
|
||||
0x13400080: 168, // en-GI
|
||||
0x13400082: 169, // en-GM
|
||||
0x13400089: 170, // en-GU
|
||||
0x1340008b: 171, // en-GY
|
||||
0x1340008c: 172, // en-HK
|
||||
0x13400095: 173, // en-IE
|
||||
0x13400096: 174, // en-IL
|
||||
0x13400097: 175, // en-IM
|
||||
0x13400098: 176, // en-IN
|
||||
0x13400099: 177, // en-IO
|
||||
0x1340009e: 178, // en-JE
|
||||
0x1340009f: 179, // en-JM
|
||||
0x134000a3: 180, // en-KE
|
||||
0x134000a6: 181, // en-KI
|
||||
0x134000a8: 182, // en-KN
|
||||
0x134000ac: 183, // en-KY
|
||||
0x134000b0: 184, // en-LC
|
||||
0x134000b3: 185, // en-LR
|
||||
0x134000b4: 186, // en-LS
|
||||
0x134000be: 187, // en-MG
|
||||
0x134000bf: 188, // en-MH
|
||||
0x134000c5: 189, // en-MO
|
||||
0x134000c6: 190, // en-MP
|
||||
0x134000c9: 191, // en-MS
|
||||
0x134000ca: 192, // en-MT
|
||||
0x134000cb: 193, // en-MU
|
||||
0x134000cd: 194, // en-MW
|
||||
0x134000cf: 195, // en-MY
|
||||
0x134000d1: 196, // en-NA
|
||||
0x134000d4: 197, // en-NF
|
||||
0x134000d5: 198, // en-NG
|
||||
0x134000d8: 199, // en-NL
|
||||
0x134000dc: 200, // en-NR
|
||||
0x134000de: 201, // en-NU
|
||||
0x134000df: 202, // en-NZ
|
||||
0x134000e5: 203, // en-PG
|
||||
0x134000e6: 204, // en-PH
|
||||
0x134000e7: 205, // en-PK
|
||||
0x134000ea: 206, // en-PN
|
||||
0x134000eb: 207, // en-PR
|
||||
0x134000ef: 208, // en-PW
|
||||
0x13400106: 209, // en-RW
|
||||
0x13400108: 210, // en-SB
|
||||
0x13400109: 211, // en-SC
|
||||
0x1340010a: 212, // en-SD
|
||||
0x1340010b: 213, // en-SE
|
||||
0x1340010c: 214, // en-SG
|
||||
0x1340010d: 215, // en-SH
|
||||
0x1340010e: 216, // en-SI
|
||||
0x13400111: 217, // en-SL
|
||||
0x13400116: 218, // en-SS
|
||||
0x1340011a: 219, // en-SX
|
||||
0x1340011c: 220, // en-SZ
|
||||
0x1340011e: 221, // en-TC
|
||||
0x13400124: 222, // en-TK
|
||||
0x13400128: 223, // en-TO
|
||||
0x1340012b: 224, // en-TT
|
||||
0x1340012c: 225, // en-TV
|
||||
0x1340012e: 226, // en-TZ
|
||||
0x13400130: 227, // en-UG
|
||||
0x13400132: 228, // en-UM
|
||||
0x13400134: 229, // en-US
|
||||
0x13400138: 230, // en-VC
|
||||
0x1340013b: 231, // en-VG
|
||||
0x1340013c: 232, // en-VI
|
||||
0x1340013e: 233, // en-VU
|
||||
0x13400141: 234, // en-WS
|
||||
0x13400160: 235, // en-ZA
|
||||
0x13400161: 236, // en-ZM
|
||||
0x13400163: 237, // en-ZW
|
||||
0x13700000: 238, // eo
|
||||
0x13700001: 239, // eo-001
|
||||
0x13900000: 240, // es
|
||||
0x1390001e: 241, // es-419
|
||||
0x1390002b: 242, // es-AR
|
||||
0x1390003e: 243, // es-BO
|
||||
0x13900040: 244, // es-BR
|
||||
0x13900050: 245, // es-CL
|
||||
0x13900053: 246, // es-CO
|
||||
0x13900055: 247, // es-CR
|
||||
0x13900058: 248, // es-CU
|
||||
0x13900064: 249, // es-DO
|
||||
0x13900067: 250, // es-EA
|
||||
0x13900068: 251, // es-EC
|
||||
0x1390006d: 252, // es-ES
|
||||
0x13900085: 253, // es-GQ
|
||||
0x13900088: 254, // es-GT
|
||||
0x1390008e: 255, // es-HN
|
||||
0x13900093: 256, // es-IC
|
||||
0x139000ce: 257, // es-MX
|
||||
0x139000d7: 258, // es-NI
|
||||
0x139000e1: 259, // es-PA
|
||||
0x139000e3: 260, // es-PE
|
||||
0x139000e6: 261, // es-PH
|
||||
0x139000eb: 262, // es-PR
|
||||
0x139000f0: 263, // es-PY
|
||||
0x13900119: 264, // es-SV
|
||||
0x13900134: 265, // es-US
|
||||
0x13900135: 266, // es-UY
|
||||
0x1390013a: 267, // es-VE
|
||||
0x13b00000: 268, // et
|
||||
0x13b00069: 269, // et-EE
|
||||
0x14000000: 270, // eu
|
||||
0x1400006d: 271, // eu-ES
|
||||
0x14100000: 272, // ewo
|
||||
0x14100051: 273, // ewo-CM
|
||||
0x14300000: 274, // fa
|
||||
0x14300023: 275, // fa-AF
|
||||
0x1430009b: 276, // fa-IR
|
||||
0x14900000: 277, // ff
|
||||
0x14900051: 278, // ff-CM
|
||||
0x14900083: 279, // ff-GN
|
||||
0x149000c8: 280, // ff-MR
|
||||
0x14900113: 281, // ff-SN
|
||||
0x14c00000: 282, // fi
|
||||
0x14c00071: 283, // fi-FI
|
||||
0x14e00000: 284, // fil
|
||||
0x14e000e6: 285, // fil-PH
|
||||
0x15300000: 286, // fo
|
||||
0x15300062: 287, // fo-DK
|
||||
0x15300075: 288, // fo-FO
|
||||
0x15900000: 289, // fr
|
||||
0x15900035: 290, // fr-BE
|
||||
0x15900036: 291, // fr-BF
|
||||
0x15900039: 292, // fr-BI
|
||||
0x1590003a: 293, // fr-BJ
|
||||
0x1590003b: 294, // fr-BL
|
||||
0x15900048: 295, // fr-CA
|
||||
0x1590004a: 296, // fr-CD
|
||||
0x1590004b: 297, // fr-CF
|
||||
0x1590004c: 298, // fr-CG
|
||||
0x1590004d: 299, // fr-CH
|
||||
0x1590004e: 300, // fr-CI
|
||||
0x15900051: 301, // fr-CM
|
||||
0x15900061: 302, // fr-DJ
|
||||
0x15900066: 303, // fr-DZ
|
||||
0x15900077: 304, // fr-FR
|
||||
0x15900079: 305, // fr-GA
|
||||
0x1590007d: 306, // fr-GF
|
||||
0x15900083: 307, // fr-GN
|
||||
0x15900084: 308, // fr-GP
|
||||
0x15900085: 309, // fr-GQ
|
||||
0x15900090: 310, // fr-HT
|
||||
0x159000a7: 311, // fr-KM
|
||||
0x159000b6: 312, // fr-LU
|
||||
0x159000b9: 313, // fr-MA
|
||||
0x159000ba: 314, // fr-MC
|
||||
0x159000bd: 315, // fr-MF
|
||||
0x159000be: 316, // fr-MG
|
||||
0x159000c2: 317, // fr-ML
|
||||
0x159000c7: 318, // fr-MQ
|
||||
0x159000c8: 319, // fr-MR
|
||||
0x159000cb: 320, // fr-MU
|
||||
0x159000d2: 321, // fr-NC
|
||||
0x159000d3: 322, // fr-NE
|
||||
0x159000e4: 323, // fr-PF
|
||||
0x159000e9: 324, // fr-PM
|
||||
0x15900101: 325, // fr-RE
|
||||
0x15900106: 326, // fr-RW
|
||||
0x15900109: 327, // fr-SC
|
||||
0x15900113: 328, // fr-SN
|
||||
0x1590011b: 329, // fr-SY
|
||||
0x1590011f: 330, // fr-TD
|
||||
0x15900121: 331, // fr-TG
|
||||
0x15900127: 332, // fr-TN
|
||||
0x1590013e: 333, // fr-VU
|
||||
0x1590013f: 334, // fr-WF
|
||||
0x1590015e: 335, // fr-YT
|
||||
0x16400000: 336, // fur
|
||||
0x1640009d: 337, // fur-IT
|
||||
0x16800000: 338, // fy
|
||||
0x168000d8: 339, // fy-NL
|
||||
0x16900000: 340, // ga
|
||||
0x16900095: 341, // ga-IE
|
||||
0x17800000: 342, // gd
|
||||
0x1780007a: 343, // gd-GB
|
||||
0x18a00000: 344, // gl
|
||||
0x18a0006d: 345, // gl-ES
|
||||
0x19c00000: 346, // gsw
|
||||
0x19c0004d: 347, // gsw-CH
|
||||
0x19c00077: 348, // gsw-FR
|
||||
0x19c000b1: 349, // gsw-LI
|
||||
0x19d00000: 350, // gu
|
||||
0x19d00098: 351, // gu-IN
|
||||
0x1a200000: 352, // guw
|
||||
0x1a400000: 353, // guz
|
||||
0x1a4000a3: 354, // guz-KE
|
||||
0x1a500000: 355, // gv
|
||||
0x1a500097: 356, // gv-IM
|
||||
0x1ad00000: 357, // ha
|
||||
0x1ad0007f: 358, // ha-GH
|
||||
0x1ad000d3: 359, // ha-NE
|
||||
0x1ad000d5: 360, // ha-NG
|
||||
0x1b100000: 361, // haw
|
||||
0x1b100134: 362, // haw-US
|
||||
0x1b500000: 363, // he
|
||||
0x1b500096: 364, // he-IL
|
||||
0x1b700000: 365, // hi
|
||||
0x1b700098: 366, // hi-IN
|
||||
0x1ca00000: 367, // hr
|
||||
0x1ca00032: 368, // hr-BA
|
||||
0x1ca0008f: 369, // hr-HR
|
||||
0x1cb00000: 370, // hsb
|
||||
0x1cb0005f: 371, // hsb-DE
|
||||
0x1ce00000: 372, // hu
|
||||
0x1ce00091: 373, // hu-HU
|
||||
0x1d000000: 374, // hy
|
||||
0x1d000027: 375, // hy-AM
|
||||
0x1da00000: 376, // id
|
||||
0x1da00094: 377, // id-ID
|
||||
0x1df00000: 378, // ig
|
||||
0x1df000d5: 379, // ig-NG
|
||||
0x1e200000: 380, // ii
|
||||
0x1e200052: 381, // ii-CN
|
||||
0x1f000000: 382, // is
|
||||
0x1f00009c: 383, // is-IS
|
||||
0x1f100000: 384, // it
|
||||
0x1f10004d: 385, // it-CH
|
||||
0x1f10009d: 386, // it-IT
|
||||
0x1f100112: 387, // it-SM
|
||||
0x1f200000: 388, // iu
|
||||
0x1f800000: 389, // ja
|
||||
0x1f8000a1: 390, // ja-JP
|
||||
0x1fb00000: 391, // jbo
|
||||
0x1ff00000: 392, // jgo
|
||||
0x1ff00051: 393, // jgo-CM
|
||||
0x20200000: 394, // jmc
|
||||
0x2020012e: 395, // jmc-TZ
|
||||
0x20600000: 396, // jv
|
||||
0x20800000: 397, // ka
|
||||
0x2080007c: 398, // ka-GE
|
||||
0x20a00000: 399, // kab
|
||||
0x20a00066: 400, // kab-DZ
|
||||
0x20e00000: 401, // kaj
|
||||
0x20f00000: 402, // kam
|
||||
0x20f000a3: 403, // kam-KE
|
||||
0x21700000: 404, // kcg
|
||||
0x21b00000: 405, // kde
|
||||
0x21b0012e: 406, // kde-TZ
|
||||
0x21f00000: 407, // kea
|
||||
0x21f00059: 408, // kea-CV
|
||||
0x22c00000: 409, // khq
|
||||
0x22c000c2: 410, // khq-ML
|
||||
0x23100000: 411, // ki
|
||||
0x231000a3: 412, // ki-KE
|
||||
0x23a00000: 413, // kk
|
||||
0x23a000ad: 414, // kk-KZ
|
||||
0x23c00000: 415, // kkj
|
||||
0x23c00051: 416, // kkj-CM
|
||||
0x23d00000: 417, // kl
|
||||
0x23d00081: 418, // kl-GL
|
||||
0x23e00000: 419, // kln
|
||||
0x23e000a3: 420, // kln-KE
|
||||
0x24200000: 421, // km
|
||||
0x242000a5: 422, // km-KH
|
||||
0x24900000: 423, // kn
|
||||
0x24900098: 424, // kn-IN
|
||||
0x24b00000: 425, // ko
|
||||
0x24b000a9: 426, // ko-KP
|
||||
0x24b000aa: 427, // ko-KR
|
||||
0x24d00000: 428, // kok
|
||||
0x24d00098: 429, // kok-IN
|
||||
0x26100000: 430, // ks
|
||||
0x26100098: 431, // ks-IN
|
||||
0x26200000: 432, // ksb
|
||||
0x2620012e: 433, // ksb-TZ
|
||||
0x26400000: 434, // ksf
|
||||
0x26400051: 435, // ksf-CM
|
||||
0x26500000: 436, // ksh
|
||||
0x2650005f: 437, // ksh-DE
|
||||
0x26b00000: 438, // ku
|
||||
0x27800000: 439, // kw
|
||||
0x2780007a: 440, // kw-GB
|
||||
0x28100000: 441, // ky
|
||||
0x281000a4: 442, // ky-KG
|
||||
0x28800000: 443, // lag
|
||||
0x2880012e: 444, // lag-TZ
|
||||
0x28c00000: 445, // lb
|
||||
0x28c000b6: 446, // lb-LU
|
||||
0x29a00000: 447, // lg
|
||||
0x29a00130: 448, // lg-UG
|
||||
0x2a600000: 449, // lkt
|
||||
0x2a600134: 450, // lkt-US
|
||||
0x2ac00000: 451, // ln
|
||||
0x2ac00029: 452, // ln-AO
|
||||
0x2ac0004a: 453, // ln-CD
|
||||
0x2ac0004b: 454, // ln-CF
|
||||
0x2ac0004c: 455, // ln-CG
|
||||
0x2af00000: 456, // lo
|
||||
0x2af000ae: 457, // lo-LA
|
||||
0x2b600000: 458, // lrc
|
||||
0x2b60009a: 459, // lrc-IQ
|
||||
0x2b60009b: 460, // lrc-IR
|
||||
0x2b700000: 461, // lt
|
||||
0x2b7000b5: 462, // lt-LT
|
||||
0x2b900000: 463, // lu
|
||||
0x2b90004a: 464, // lu-CD
|
||||
0x2bb00000: 465, // luo
|
||||
0x2bb000a3: 466, // luo-KE
|
||||
0x2bc00000: 467, // luy
|
||||
0x2bc000a3: 468, // luy-KE
|
||||
0x2be00000: 469, // lv
|
||||
0x2be000b7: 470, // lv-LV
|
||||
0x2c800000: 471, // mas
|
||||
0x2c8000a3: 472, // mas-KE
|
||||
0x2c80012e: 473, // mas-TZ
|
||||
0x2e000000: 474, // mer
|
||||
0x2e0000a3: 475, // mer-KE
|
||||
0x2e400000: 476, // mfe
|
||||
0x2e4000cb: 477, // mfe-MU
|
||||
0x2e800000: 478, // mg
|
||||
0x2e8000be: 479, // mg-MG
|
||||
0x2e900000: 480, // mgh
|
||||
0x2e9000d0: 481, // mgh-MZ
|
||||
0x2eb00000: 482, // mgo
|
||||
0x2eb00051: 483, // mgo-CM
|
||||
0x2f600000: 484, // mk
|
||||
0x2f6000c1: 485, // mk-MK
|
||||
0x2fb00000: 486, // ml
|
||||
0x2fb00098: 487, // ml-IN
|
||||
0x30200000: 488, // mn
|
||||
0x302000c4: 489, // mn-MN
|
||||
0x31200000: 490, // mr
|
||||
0x31200098: 491, // mr-IN
|
||||
0x31600000: 492, // ms
|
||||
0x3160003d: 493, // ms-BN
|
||||
0x316000cf: 494, // ms-MY
|
||||
0x3160010c: 495, // ms-SG
|
||||
0x31700000: 496, // mt
|
||||
0x317000ca: 497, // mt-MT
|
||||
0x31c00000: 498, // mua
|
||||
0x31c00051: 499, // mua-CM
|
||||
0x32800000: 500, // my
|
||||
0x328000c3: 501, // my-MM
|
||||
0x33100000: 502, // mzn
|
||||
0x3310009b: 503, // mzn-IR
|
||||
0x33800000: 504, // nah
|
||||
0x33c00000: 505, // naq
|
||||
0x33c000d1: 506, // naq-NA
|
||||
0x33e00000: 507, // nb
|
||||
0x33e000d9: 508, // nb-NO
|
||||
0x33e0010f: 509, // nb-SJ
|
||||
0x34500000: 510, // nd
|
||||
0x34500163: 511, // nd-ZW
|
||||
0x34700000: 512, // nds
|
||||
0x3470005f: 513, // nds-DE
|
||||
0x347000d8: 514, // nds-NL
|
||||
0x34800000: 515, // ne
|
||||
0x34800098: 516, // ne-IN
|
||||
0x348000da: 517, // ne-NP
|
||||
0x35e00000: 518, // nl
|
||||
0x35e0002f: 519, // nl-AW
|
||||
0x35e00035: 520, // nl-BE
|
||||
0x35e0003f: 521, // nl-BQ
|
||||
0x35e0005a: 522, // nl-CW
|
||||
0x35e000d8: 523, // nl-NL
|
||||
0x35e00115: 524, // nl-SR
|
||||
0x35e0011a: 525, // nl-SX
|
||||
0x35f00000: 526, // nmg
|
||||
0x35f00051: 527, // nmg-CM
|
||||
0x36100000: 528, // nn
|
||||
0x361000d9: 529, // nn-NO
|
||||
0x36300000: 530, // nnh
|
||||
0x36300051: 531, // nnh-CM
|
||||
0x36600000: 532, // no
|
||||
0x36c00000: 533, // nqo
|
||||
0x36d00000: 534, // nr
|
||||
0x37100000: 535, // nso
|
||||
0x37700000: 536, // nus
|
||||
0x37700116: 537, // nus-SS
|
||||
0x37e00000: 538, // ny
|
||||
0x38000000: 539, // nyn
|
||||
0x38000130: 540, // nyn-UG
|
||||
0x38700000: 541, // om
|
||||
0x3870006e: 542, // om-ET
|
||||
0x387000a3: 543, // om-KE
|
||||
0x38c00000: 544, // or
|
||||
0x38c00098: 545, // or-IN
|
||||
0x38f00000: 546, // os
|
||||
0x38f0007c: 547, // os-GE
|
||||
0x38f00105: 548, // os-RU
|
||||
0x39400000: 549, // pa
|
||||
0x39405000: 550, // pa-Arab
|
||||
0x394050e7: 551, // pa-Arab-PK
|
||||
0x3942f000: 552, // pa-Guru
|
||||
0x3942f098: 553, // pa-Guru-IN
|
||||
0x39800000: 554, // pap
|
||||
0x3aa00000: 555, // pl
|
||||
0x3aa000e8: 556, // pl-PL
|
||||
0x3b400000: 557, // prg
|
||||
0x3b400001: 558, // prg-001
|
||||
0x3b500000: 559, // ps
|
||||
0x3b500023: 560, // ps-AF
|
||||
0x3b700000: 561, // pt
|
||||
0x3b700029: 562, // pt-AO
|
||||
0x3b700040: 563, // pt-BR
|
||||
0x3b70004d: 564, // pt-CH
|
||||
0x3b700059: 565, // pt-CV
|
||||
0x3b700085: 566, // pt-GQ
|
||||
0x3b70008a: 567, // pt-GW
|
||||
0x3b7000b6: 568, // pt-LU
|
||||
0x3b7000c5: 569, // pt-MO
|
||||
0x3b7000d0: 570, // pt-MZ
|
||||
0x3b7000ed: 571, // pt-PT
|
||||
0x3b700117: 572, // pt-ST
|
||||
0x3b700125: 573, // pt-TL
|
||||
0x3bb00000: 574, // qu
|
||||
0x3bb0003e: 575, // qu-BO
|
||||
0x3bb00068: 576, // qu-EC
|
||||
0x3bb000e3: 577, // qu-PE
|
||||
0x3cb00000: 578, // rm
|
||||
0x3cb0004d: 579, // rm-CH
|
||||
0x3d000000: 580, // rn
|
||||
0x3d000039: 581, // rn-BI
|
||||
0x3d300000: 582, // ro
|
||||
0x3d3000bb: 583, // ro-MD
|
||||
0x3d300103: 584, // ro-RO
|
||||
0x3d500000: 585, // rof
|
||||
0x3d50012e: 586, // rof-TZ
|
||||
0x3d900000: 587, // ru
|
||||
0x3d900046: 588, // ru-BY
|
||||
0x3d9000a4: 589, // ru-KG
|
||||
0x3d9000ad: 590, // ru-KZ
|
||||
0x3d9000bb: 591, // ru-MD
|
||||
0x3d900105: 592, // ru-RU
|
||||
0x3d90012f: 593, // ru-UA
|
||||
0x3dc00000: 594, // rw
|
||||
0x3dc00106: 595, // rw-RW
|
||||
0x3dd00000: 596, // rwk
|
||||
0x3dd0012e: 597, // rwk-TZ
|
||||
0x3e200000: 598, // sah
|
||||
0x3e200105: 599, // sah-RU
|
||||
0x3e300000: 600, // saq
|
||||
0x3e3000a3: 601, // saq-KE
|
||||
0x3e900000: 602, // sbp
|
||||
0x3e90012e: 603, // sbp-TZ
|
||||
0x3f200000: 604, // sdh
|
||||
0x3f300000: 605, // se
|
||||
0x3f300071: 606, // se-FI
|
||||
0x3f3000d9: 607, // se-NO
|
||||
0x3f30010b: 608, // se-SE
|
||||
0x3f500000: 609, // seh
|
||||
0x3f5000d0: 610, // seh-MZ
|
||||
0x3f700000: 611, // ses
|
||||
0x3f7000c2: 612, // ses-ML
|
||||
0x3f800000: 613, // sg
|
||||
0x3f80004b: 614, // sg-CF
|
||||
0x3fe00000: 615, // shi
|
||||
0x3fe52000: 616, // shi-Latn
|
||||
0x3fe520b9: 617, // shi-Latn-MA
|
||||
0x3fed2000: 618, // shi-Tfng
|
||||
0x3fed20b9: 619, // shi-Tfng-MA
|
||||
0x40200000: 620, // si
|
||||
0x402000b2: 621, // si-LK
|
||||
0x40800000: 622, // sk
|
||||
0x40800110: 623, // sk-SK
|
||||
0x40c00000: 624, // sl
|
||||
0x40c0010e: 625, // sl-SI
|
||||
0x41200000: 626, // sma
|
||||
0x41300000: 627, // smi
|
||||
0x41400000: 628, // smj
|
||||
0x41500000: 629, // smn
|
||||
0x41500071: 630, // smn-FI
|
||||
0x41800000: 631, // sms
|
||||
0x41900000: 632, // sn
|
||||
0x41900163: 633, // sn-ZW
|
||||
0x41f00000: 634, // so
|
||||
0x41f00061: 635, // so-DJ
|
||||
0x41f0006e: 636, // so-ET
|
||||
0x41f000a3: 637, // so-KE
|
||||
0x41f00114: 638, // so-SO
|
||||
0x42700000: 639, // sq
|
||||
0x42700026: 640, // sq-AL
|
||||
0x427000c1: 641, // sq-MK
|
||||
0x4270014c: 642, // sq-XK
|
||||
0x42800000: 643, // sr
|
||||
0x4281e000: 644, // sr-Cyrl
|
||||
0x4281e032: 645, // sr-Cyrl-BA
|
||||
0x4281e0bc: 646, // sr-Cyrl-ME
|
||||
0x4281e104: 647, // sr-Cyrl-RS
|
||||
0x4281e14c: 648, // sr-Cyrl-XK
|
||||
0x42852000: 649, // sr-Latn
|
||||
0x42852032: 650, // sr-Latn-BA
|
||||
0x428520bc: 651, // sr-Latn-ME
|
||||
0x42852104: 652, // sr-Latn-RS
|
||||
0x4285214c: 653, // sr-Latn-XK
|
||||
0x42d00000: 654, // ss
|
||||
0x43000000: 655, // ssy
|
||||
0x43100000: 656, // st
|
||||
0x43a00000: 657, // sv
|
||||
0x43a00030: 658, // sv-AX
|
||||
0x43a00071: 659, // sv-FI
|
||||
0x43a0010b: 660, // sv-SE
|
||||
0x43b00000: 661, // sw
|
||||
0x43b0004a: 662, // sw-CD
|
||||
0x43b000a3: 663, // sw-KE
|
||||
0x43b0012e: 664, // sw-TZ
|
||||
0x43b00130: 665, // sw-UG
|
||||
0x44400000: 666, // syr
|
||||
0x44600000: 667, // ta
|
||||
0x44600098: 668, // ta-IN
|
||||
0x446000b2: 669, // ta-LK
|
||||
0x446000cf: 670, // ta-MY
|
||||
0x4460010c: 671, // ta-SG
|
||||
0x45700000: 672, // te
|
||||
0x45700098: 673, // te-IN
|
||||
0x45a00000: 674, // teo
|
||||
0x45a000a3: 675, // teo-KE
|
||||
0x45a00130: 676, // teo-UG
|
||||
0x46100000: 677, // th
|
||||
0x46100122: 678, // th-TH
|
||||
0x46500000: 679, // ti
|
||||
0x4650006c: 680, // ti-ER
|
||||
0x4650006e: 681, // ti-ET
|
||||
0x46700000: 682, // tig
|
||||
0x46c00000: 683, // tk
|
||||
0x46c00126: 684, // tk-TM
|
||||
0x47600000: 685, // tn
|
||||
0x47800000: 686, // to
|
||||
0x47800128: 687, // to-TO
|
||||
0x48000000: 688, // tr
|
||||
0x4800005c: 689, // tr-CY
|
||||
0x4800012a: 690, // tr-TR
|
||||
0x48400000: 691, // ts
|
||||
0x49a00000: 692, // twq
|
||||
0x49a000d3: 693, // twq-NE
|
||||
0x49f00000: 694, // tzm
|
||||
0x49f000b9: 695, // tzm-MA
|
||||
0x4a200000: 696, // ug
|
||||
0x4a200052: 697, // ug-CN
|
||||
0x4a400000: 698, // uk
|
||||
0x4a40012f: 699, // uk-UA
|
||||
0x4aa00000: 700, // ur
|
||||
0x4aa00098: 701, // ur-IN
|
||||
0x4aa000e7: 702, // ur-PK
|
||||
0x4b200000: 703, // uz
|
||||
0x4b205000: 704, // uz-Arab
|
||||
0x4b205023: 705, // uz-Arab-AF
|
||||
0x4b21e000: 706, // uz-Cyrl
|
||||
0x4b21e136: 707, // uz-Cyrl-UZ
|
||||
0x4b252000: 708, // uz-Latn
|
||||
0x4b252136: 709, // uz-Latn-UZ
|
||||
0x4b400000: 710, // vai
|
||||
0x4b452000: 711, // vai-Latn
|
||||
0x4b4520b3: 712, // vai-Latn-LR
|
||||
0x4b4d9000: 713, // vai-Vaii
|
||||
0x4b4d90b3: 714, // vai-Vaii-LR
|
||||
0x4b600000: 715, // ve
|
||||
0x4b900000: 716, // vi
|
||||
0x4b90013d: 717, // vi-VN
|
||||
0x4bf00000: 718, // vo
|
||||
0x4bf00001: 719, // vo-001
|
||||
0x4c200000: 720, // vun
|
||||
0x4c20012e: 721, // vun-TZ
|
||||
0x4c400000: 722, // wa
|
||||
0x4c500000: 723, // wae
|
||||
0x4c50004d: 724, // wae-CH
|
||||
0x4db00000: 725, // wo
|
||||
0x4e800000: 726, // xh
|
||||
0x4f100000: 727, // xog
|
||||
0x4f100130: 728, // xog-UG
|
||||
0x4ff00000: 729, // yav
|
||||
0x4ff00051: 730, // yav-CM
|
||||
0x50800000: 731, // yi
|
||||
0x50800001: 732, // yi-001
|
||||
0x50e00000: 733, // yo
|
||||
0x50e0003a: 734, // yo-BJ
|
||||
0x50e000d5: 735, // yo-NG
|
||||
0x51500000: 736, // yue
|
||||
0x5150008c: 737, // yue-HK
|
||||
0x51e00000: 738, // zgh
|
||||
0x51e000b9: 739, // zgh-MA
|
||||
0x51f00000: 740, // zh
|
||||
0x51f34000: 741, // zh-Hans
|
||||
0x51f34052: 742, // zh-Hans-CN
|
||||
0x51f3408c: 743, // zh-Hans-HK
|
||||
0x51f340c5: 744, // zh-Hans-MO
|
||||
0x51f3410c: 745, // zh-Hans-SG
|
||||
0x51f35000: 746, // zh-Hant
|
||||
0x51f3508c: 747, // zh-Hant-HK
|
||||
0x51f350c5: 748, // zh-Hant-MO
|
||||
0x51f3512d: 749, // zh-Hant-TW
|
||||
0x52400000: 750, // zu
|
||||
0x52400160: 751, // zu-ZA
|
||||
}
|
||||
|
||||
// Total table size 4580 bytes (4KiB); checksum: A7F72A2A
|
975
vendor/golang.org/x/text/language/language.go
generated
vendored
975
vendor/golang.org/x/text/language/language.go
generated
vendored
|
@ -1,975 +0,0 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run maketables.go gen_common.go -output tables.go
|
||||
//go:generate go run gen_index.go
|
||||
|
||||
// Package language implements BCP 47 language tags and related functionality.
|
||||
//
|
||||
// The Tag type, which is used to represent languages, is agnostic to the
|
||||
// meaning of its subtags. Tags are not fully canonicalized to preserve
|
||||
// information that may be valuable in certain contexts. As a consequence, two
|
||||
// different tags may represent identical languages.
|
||||
//
|
||||
// Initializing language- or locale-specific components usually consists of
|
||||
// two steps. The first step is to select a display language based on the
|
||||
// preferred languages of the user and the languages supported by an application.
|
||||
// The second step is to create the language-specific services based on
|
||||
// this selection. Each is discussed in more details below.
|
||||
//
|
||||
// Matching preferred against supported languages
|
||||
//
|
||||
// An application may support various languages. This list is typically limited
|
||||
// by the languages for which there exists translations of the user interface.
|
||||
// Similarly, a user may provide a list of preferred languages which is limited
|
||||
// by the languages understood by this user.
|
||||
// An application should use a Matcher to find the best supported language based
|
||||
// on the user's preferred list.
|
||||
// Matchers are aware of the intricacies of equivalence between languages.
|
||||
// The default Matcher implementation takes into account things such as
|
||||
// deprecated subtags, legacy tags, and mutual intelligibility between scripts
|
||||
// and languages.
|
||||
//
|
||||
// A Matcher for English, Australian English, Danish, and standard Mandarin can
|
||||
// be defined as follows:
|
||||
//
|
||||
// var matcher = language.NewMatcher([]language.Tag{
|
||||
// language.English, // The first language is used as fallback.
|
||||
// language.MustParse("en-AU"),
|
||||
// language.Danish,
|
||||
// language.Chinese,
|
||||
// })
|
||||
//
|
||||
// The following code selects the best match for someone speaking Spanish and
|
||||
// Norwegian:
|
||||
//
|
||||
// preferred := []language.Tag{ language.Spanish, language.Norwegian }
|
||||
// tag, _, _ := matcher.Match(preferred...)
|
||||
//
|
||||
// In this case, the best match is Danish, as Danish is sufficiently a match to
|
||||
// Norwegian to not have to fall back to the default.
|
||||
// See ParseAcceptLanguage on how to handle the Accept-Language HTTP header.
|
||||
//
|
||||
// Selecting language-specific services
|
||||
//
|
||||
// One should always use the Tag returned by the Matcher to create an instance
|
||||
// of any of the language-specific services provided by the text repository.
|
||||
// This prevents the mixing of languages, such as having a different language for
|
||||
// messages and display names, as well as improper casing or sorting order for
|
||||
// the selected language.
|
||||
// Using the returned Tag also allows user-defined settings, such as collation
|
||||
// order or numbering system to be transparently passed as options.
|
||||
//
|
||||
// If you have language-specific data in your application, however, it will in
|
||||
// most cases suffice to use the index returned by the matcher to identify
|
||||
// the user language.
|
||||
// The following loop provides an alternative in case this is not sufficient:
|
||||
//
|
||||
// supported := map[language.Tag]data{
|
||||
// language.English: enData,
|
||||
// language.MustParse("en-AU"): enAUData,
|
||||
// language.Danish: daData,
|
||||
// language.Chinese: zhData,
|
||||
// }
|
||||
// tag, _, _ := matcher.Match(preferred...)
|
||||
// for ; tag != language.Und; tag = tag.Parent() {
|
||||
// if v, ok := supported[tag]; ok {
|
||||
// return v
|
||||
// }
|
||||
// }
|
||||
// return enData // should not reach here
|
||||
//
|
||||
// Repeatedly taking the Parent of the tag returned by Match will eventually
|
||||
// match one of the tags used to initialize the Matcher.
|
||||
//
|
||||
// Canonicalization
|
||||
//
|
||||
// By default, only legacy and deprecated tags are converted into their
|
||||
// canonical equivalent. All other information is preserved. This approach makes
|
||||
// the confidence scores more accurate and allows matchers to distinguish
|
||||
// between variants that are otherwise lost.
|
||||
//
|
||||
// As a consequence, two tags that should be treated as identical according to
|
||||
// BCP 47 or CLDR, like "en-Latn" and "en", will be represented differently. The
|
||||
// Matchers will handle such distinctions, though, and are aware of the
|
||||
// equivalence relations. The CanonType type can be used to alter the
|
||||
// canonicalization form.
|
||||
//
|
||||
// References
|
||||
//
|
||||
// BCP 47 - Tags for Identifying Languages
|
||||
// http://tools.ietf.org/html/bcp47
|
||||
package language // import "golang.org/x/text/language"
|
||||
|
||||
// TODO: Remove above NOTE after:
|
||||
// - verifying that tables are dropped correctly (most notably matcher tables).
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
// maxCoreSize is the maximum size of a BCP 47 tag without variants and
|
||||
// extensions. Equals max lang (3) + script (4) + max reg (3) + 2 dashes.
|
||||
maxCoreSize = 12
|
||||
|
||||
// max99thPercentileSize is a somewhat arbitrary buffer size that presumably
|
||||
// is large enough to hold at least 99% of the BCP 47 tags.
|
||||
max99thPercentileSize = 32
|
||||
|
||||
// maxSimpleUExtensionSize is the maximum size of a -u extension with one
|
||||
// key-type pair. Equals len("-u-") + key (2) + dash + max value (8).
|
||||
maxSimpleUExtensionSize = 14
|
||||
)
|
||||
|
||||
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
|
||||
// specific language or locale. All language tag values are guaranteed to be
|
||||
// well-formed.
|
||||
type Tag struct {
|
||||
lang langID
|
||||
region regionID
|
||||
script scriptID
|
||||
pVariant byte // offset in str, includes preceding '-'
|
||||
pExt uint16 // offset of first extension, includes preceding '-'
|
||||
|
||||
// str is the string representation of the Tag. It will only be used if the
|
||||
// tag has variants or extensions.
|
||||
str string
|
||||
}
|
||||
|
||||
// Make is a convenience wrapper for Parse that omits the error.
|
||||
// In case of an error, a sensible default is returned.
|
||||
func Make(s string) Tag {
|
||||
return Default.Make(s)
|
||||
}
|
||||
|
||||
// Make is a convenience wrapper for c.Parse that omits the error.
|
||||
// In case of an error, a sensible default is returned.
|
||||
func (c CanonType) Make(s string) Tag {
|
||||
t, _ := c.Parse(s)
|
||||
return t
|
||||
}
|
||||
|
||||
// Raw returns the raw base language, script and region, without making an
|
||||
// attempt to infer their values.
|
||||
func (t Tag) Raw() (b Base, s Script, r Region) {
|
||||
return Base{t.lang}, Script{t.script}, Region{t.region}
|
||||
}
|
||||
|
||||
// equalTags compares language, script and region subtags only.
|
||||
func (t Tag) equalTags(a Tag) bool {
|
||||
return t.lang == a.lang && t.script == a.script && t.region == a.region
|
||||
}
|
||||
|
||||
// IsRoot returns true if t is equal to language "und".
|
||||
func (t Tag) IsRoot() bool {
|
||||
if int(t.pVariant) < len(t.str) {
|
||||
return false
|
||||
}
|
||||
return t.equalTags(und)
|
||||
}
|
||||
|
||||
// private reports whether the Tag consists solely of a private use tag.
|
||||
func (t Tag) private() bool {
|
||||
return t.str != "" && t.pVariant == 0
|
||||
}
|
||||
|
||||
// CanonType can be used to enable or disable various types of canonicalization.
|
||||
type CanonType int
|
||||
|
||||
const (
|
||||
// Replace deprecated base languages with their preferred replacements.
|
||||
DeprecatedBase CanonType = 1 << iota
|
||||
// Replace deprecated scripts with their preferred replacements.
|
||||
DeprecatedScript
|
||||
// Replace deprecated regions with their preferred replacements.
|
||||
DeprecatedRegion
|
||||
// Remove redundant scripts.
|
||||
SuppressScript
|
||||
// Normalize legacy encodings. This includes legacy languages defined in
|
||||
// CLDR as well as bibliographic codes defined in ISO-639.
|
||||
Legacy
|
||||
// Map the dominant language of a macro language group to the macro language
|
||||
// subtag. For example cmn -> zh.
|
||||
Macro
|
||||
// The CLDR flag should be used if full compatibility with CLDR is required.
|
||||
// There are a few cases where language.Tag may differ from CLDR. To follow all
|
||||
// of CLDR's suggestions, use All|CLDR.
|
||||
CLDR
|
||||
|
||||
// Raw can be used to Compose or Parse without Canonicalization.
|
||||
Raw CanonType = 0
|
||||
|
||||
// Replace all deprecated tags with their preferred replacements.
|
||||
Deprecated = DeprecatedBase | DeprecatedScript | DeprecatedRegion
|
||||
|
||||
// All canonicalizations recommended by BCP 47.
|
||||
BCP47 = Deprecated | SuppressScript
|
||||
|
||||
// All canonicalizations.
|
||||
All = BCP47 | Legacy | Macro
|
||||
|
||||
// Default is the canonicalization used by Parse, Make and Compose. To
|
||||
// preserve as much information as possible, canonicalizations that remove
|
||||
// potentially valuable information are not included. The Matcher is
|
||||
// designed to recognize similar tags that would be the same if
|
||||
// they were canonicalized using All.
|
||||
Default = Deprecated | Legacy
|
||||
|
||||
canonLang = DeprecatedBase | Legacy | Macro
|
||||
|
||||
// TODO: LikelyScript, LikelyRegion: suppress similar to ICU.
|
||||
)
|
||||
|
||||
// canonicalize returns the canonicalized equivalent of the tag and
|
||||
// whether there was any change.
|
||||
func (t Tag) canonicalize(c CanonType) (Tag, bool) {
|
||||
if c == Raw {
|
||||
return t, false
|
||||
}
|
||||
changed := false
|
||||
if c&SuppressScript != 0 {
|
||||
if t.lang < langNoIndexOffset && uint8(t.script) == suppressScript[t.lang] {
|
||||
t.script = 0
|
||||
changed = true
|
||||
}
|
||||
}
|
||||
if c&canonLang != 0 {
|
||||
for {
|
||||
if l, aliasType := normLang(t.lang); l != t.lang {
|
||||
switch aliasType {
|
||||
case langLegacy:
|
||||
if c&Legacy != 0 {
|
||||
if t.lang == _sh && t.script == 0 {
|
||||
t.script = _Latn
|
||||
}
|
||||
t.lang = l
|
||||
changed = true
|
||||
}
|
||||
case langMacro:
|
||||
if c&Macro != 0 {
|
||||
// We deviate here from CLDR. The mapping "nb" -> "no"
|
||||
// qualifies as a typical Macro language mapping. However,
|
||||
// for legacy reasons, CLDR maps "no", the macro language
|
||||
// code for Norwegian, to the dominant variant "nb". This
|
||||
// change is currently under consideration for CLDR as well.
|
||||
// See http://unicode.org/cldr/trac/ticket/2698 and also
|
||||
// http://unicode.org/cldr/trac/ticket/1790 for some of the
|
||||
// practical implications. TODO: this check could be removed
|
||||
// if CLDR adopts this change.
|
||||
if c&CLDR == 0 || t.lang != _nb {
|
||||
changed = true
|
||||
t.lang = l
|
||||
}
|
||||
}
|
||||
case langDeprecated:
|
||||
if c&DeprecatedBase != 0 {
|
||||
if t.lang == _mo && t.region == 0 {
|
||||
t.region = _MD
|
||||
}
|
||||
t.lang = l
|
||||
changed = true
|
||||
// Other canonicalization types may still apply.
|
||||
continue
|
||||
}
|
||||
}
|
||||
} else if c&Legacy != 0 && t.lang == _no && c&CLDR != 0 {
|
||||
t.lang = _nb
|
||||
changed = true
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
if c&DeprecatedScript != 0 {
|
||||
if t.script == _Qaai {
|
||||
changed = true
|
||||
t.script = _Zinh
|
||||
}
|
||||
}
|
||||
if c&DeprecatedRegion != 0 {
|
||||
if r := normRegion(t.region); r != 0 {
|
||||
changed = true
|
||||
t.region = r
|
||||
}
|
||||
}
|
||||
return t, changed
|
||||
}
|
||||
|
||||
// Canonicalize returns the canonicalized equivalent of the tag.
|
||||
func (c CanonType) Canonicalize(t Tag) (Tag, error) {
|
||||
t, changed := t.canonicalize(c)
|
||||
if changed {
|
||||
t.remakeString()
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// Confidence indicates the level of certainty for a given return value.
|
||||
// For example, Serbian may be written in Cyrillic or Latin script.
|
||||
// The confidence level indicates whether a value was explicitly specified,
|
||||
// whether it is typically the only possible value, or whether there is
|
||||
// an ambiguity.
|
||||
type Confidence int
|
||||
|
||||
const (
|
||||
No Confidence = iota // full confidence that there was no match
|
||||
Low // most likely value picked out of a set of alternatives
|
||||
High // value is generally assumed to be the correct match
|
||||
Exact // exact match or explicitly specified value
|
||||
)
|
||||
|
||||
var confName = []string{"No", "Low", "High", "Exact"}
|
||||
|
||||
func (c Confidence) String() string {
|
||||
return confName[c]
|
||||
}
|
||||
|
||||
// remakeString is used to update t.str in case lang, script or region changed.
|
||||
// It is assumed that pExt and pVariant still point to the start of the
|
||||
// respective parts.
|
||||
func (t *Tag) remakeString() {
|
||||
if t.str == "" {
|
||||
return
|
||||
}
|
||||
extra := t.str[t.pVariant:]
|
||||
if t.pVariant > 0 {
|
||||
extra = extra[1:]
|
||||
}
|
||||
if t.equalTags(und) && strings.HasPrefix(extra, "x-") {
|
||||
t.str = extra
|
||||
t.pVariant = 0
|
||||
t.pExt = 0
|
||||
return
|
||||
}
|
||||
var buf [max99thPercentileSize]byte // avoid extra memory allocation in most cases.
|
||||
b := buf[:t.genCoreBytes(buf[:])]
|
||||
if extra != "" {
|
||||
diff := len(b) - int(t.pVariant)
|
||||
b = append(b, '-')
|
||||
b = append(b, extra...)
|
||||
t.pVariant = uint8(int(t.pVariant) + diff)
|
||||
t.pExt = uint16(int(t.pExt) + diff)
|
||||
} else {
|
||||
t.pVariant = uint8(len(b))
|
||||
t.pExt = uint16(len(b))
|
||||
}
|
||||
t.str = string(b)
|
||||
}
|
||||
|
||||
// genCoreBytes writes a string for the base languages, script and region tags
|
||||
// to the given buffer and returns the number of bytes written. It will never
|
||||
// write more than maxCoreSize bytes.
|
||||
func (t *Tag) genCoreBytes(buf []byte) int {
|
||||
n := t.lang.stringToBuf(buf[:])
|
||||
if t.script != 0 {
|
||||
n += copy(buf[n:], "-")
|
||||
n += copy(buf[n:], t.script.String())
|
||||
}
|
||||
if t.region != 0 {
|
||||
n += copy(buf[n:], "-")
|
||||
n += copy(buf[n:], t.region.String())
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// String returns the canonical string representation of the language tag.
|
||||
func (t Tag) String() string {
|
||||
if t.str != "" {
|
||||
return t.str
|
||||
}
|
||||
if t.script == 0 && t.region == 0 {
|
||||
return t.lang.String()
|
||||
}
|
||||
buf := [maxCoreSize]byte{}
|
||||
return string(buf[:t.genCoreBytes(buf[:])])
|
||||
}
|
||||
|
||||
// Base returns the base language of the language tag. If the base language is
|
||||
// unspecified, an attempt will be made to infer it from the context.
|
||||
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
|
||||
func (t Tag) Base() (Base, Confidence) {
|
||||
if t.lang != 0 {
|
||||
return Base{t.lang}, Exact
|
||||
}
|
||||
c := High
|
||||
if t.script == 0 && !(Region{t.region}).IsCountry() {
|
||||
c = Low
|
||||
}
|
||||
if tag, err := addTags(t); err == nil && tag.lang != 0 {
|
||||
return Base{tag.lang}, c
|
||||
}
|
||||
return Base{0}, No
|
||||
}
|
||||
|
||||
// Script infers the script for the language tag. If it was not explicitly given, it will infer
|
||||
// a most likely candidate.
|
||||
// If more than one script is commonly used for a language, the most likely one
|
||||
// is returned with a low confidence indication. For example, it returns (Cyrl, Low)
|
||||
// for Serbian.
|
||||
// If a script cannot be inferred (Zzzz, No) is returned. We do not use Zyyy (undetermined)
|
||||
// as one would suspect from the IANA registry for BCP 47. In a Unicode context Zyyy marks
|
||||
// common characters (like 1, 2, 3, '.', etc.) and is therefore more like multiple scripts.
|
||||
// See http://www.unicode.org/reports/tr24/#Values for more details. Zzzz is also used for
|
||||
// unknown value in CLDR. (Zzzz, Exact) is returned if Zzzz was explicitly specified.
|
||||
// Note that an inferred script is never guaranteed to be the correct one. Latin is
|
||||
// almost exclusively used for Afrikaans, but Arabic has been used for some texts
|
||||
// in the past. Also, the script that is commonly used may change over time.
|
||||
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
|
||||
func (t Tag) Script() (Script, Confidence) {
|
||||
if t.script != 0 {
|
||||
return Script{t.script}, Exact
|
||||
}
|
||||
sc, c := scriptID(_Zzzz), No
|
||||
if t.lang < langNoIndexOffset {
|
||||
if scr := scriptID(suppressScript[t.lang]); scr != 0 {
|
||||
// Note: it is not always the case that a language with a suppress
|
||||
// script value is only written in one script (e.g. kk, ms, pa).
|
||||
if t.region == 0 {
|
||||
return Script{scriptID(scr)}, High
|
||||
}
|
||||
sc, c = scr, High
|
||||
}
|
||||
}
|
||||
if tag, err := addTags(t); err == nil {
|
||||
if tag.script != sc {
|
||||
sc, c = tag.script, Low
|
||||
}
|
||||
} else {
|
||||
t, _ = (Deprecated | Macro).Canonicalize(t)
|
||||
if tag, err := addTags(t); err == nil && tag.script != sc {
|
||||
sc, c = tag.script, Low
|
||||
}
|
||||
}
|
||||
return Script{sc}, c
|
||||
}
|
||||
|
||||
// Region returns the region for the language tag. If it was not explicitly given, it will
|
||||
// infer a most likely candidate from the context.
|
||||
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
|
||||
func (t Tag) Region() (Region, Confidence) {
|
||||
if t.region != 0 {
|
||||
return Region{t.region}, Exact
|
||||
}
|
||||
if t, err := addTags(t); err == nil {
|
||||
return Region{t.region}, Low // TODO: differentiate between high and low.
|
||||
}
|
||||
t, _ = (Deprecated | Macro).Canonicalize(t)
|
||||
if tag, err := addTags(t); err == nil {
|
||||
return Region{tag.region}, Low
|
||||
}
|
||||
return Region{_ZZ}, No // TODO: return world instead of undetermined?
|
||||
}
|
||||
|
||||
// Variant returns the variants specified explicitly for this language tag.
|
||||
// or nil if no variant was specified.
|
||||
func (t Tag) Variants() []Variant {
|
||||
v := []Variant{}
|
||||
if int(t.pVariant) < int(t.pExt) {
|
||||
for x, str := "", t.str[t.pVariant:t.pExt]; str != ""; {
|
||||
x, str = nextToken(str)
|
||||
v = append(v, Variant{x})
|
||||
}
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
|
||||
// specific language are substituted with fields from the parent language.
|
||||
// The parent for a language may change for newer versions of CLDR.
|
||||
func (t Tag) Parent() Tag {
|
||||
if t.str != "" {
|
||||
// Strip the variants and extensions.
|
||||
t, _ = Raw.Compose(t.Raw())
|
||||
if t.region == 0 && t.script != 0 && t.lang != 0 {
|
||||
base, _ := addTags(Tag{lang: t.lang})
|
||||
if base.script == t.script {
|
||||
return Tag{lang: t.lang}
|
||||
}
|
||||
}
|
||||
return t
|
||||
}
|
||||
if t.lang != 0 {
|
||||
if t.region != 0 {
|
||||
maxScript := t.script
|
||||
if maxScript == 0 {
|
||||
max, _ := addTags(t)
|
||||
maxScript = max.script
|
||||
}
|
||||
|
||||
for i := range parents {
|
||||
if langID(parents[i].lang) == t.lang && scriptID(parents[i].maxScript) == maxScript {
|
||||
for _, r := range parents[i].fromRegion {
|
||||
if regionID(r) == t.region {
|
||||
return Tag{
|
||||
lang: t.lang,
|
||||
script: scriptID(parents[i].script),
|
||||
region: regionID(parents[i].toRegion),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Strip the script if it is the default one.
|
||||
base, _ := addTags(Tag{lang: t.lang})
|
||||
if base.script != maxScript {
|
||||
return Tag{lang: t.lang, script: maxScript}
|
||||
}
|
||||
return Tag{lang: t.lang}
|
||||
} else if t.script != 0 {
|
||||
// The parent for an base-script pair with a non-default script is
|
||||
// "und" instead of the base language.
|
||||
base, _ := addTags(Tag{lang: t.lang})
|
||||
if base.script != t.script {
|
||||
return und
|
||||
}
|
||||
return Tag{lang: t.lang}
|
||||
}
|
||||
}
|
||||
return und
|
||||
}
|
||||
|
||||
// returns token t and the rest of the string.
|
||||
func nextToken(s string) (t, tail string) {
|
||||
p := strings.Index(s[1:], "-")
|
||||
if p == -1 {
|
||||
return s[1:], ""
|
||||
}
|
||||
p++
|
||||
return s[1:p], s[p:]
|
||||
}
|
||||
|
||||
// Extension is a single BCP 47 extension.
|
||||
type Extension struct {
|
||||
s string
|
||||
}
|
||||
|
||||
// String returns the string representation of the extension, including the
|
||||
// type tag.
|
||||
func (e Extension) String() string {
|
||||
return e.s
|
||||
}
|
||||
|
||||
// ParseExtension parses s as an extension and returns it on success.
|
||||
func ParseExtension(s string) (e Extension, err error) {
|
||||
scan := makeScannerString(s)
|
||||
var end int
|
||||
if n := len(scan.token); n != 1 {
|
||||
return Extension{}, errSyntax
|
||||
}
|
||||
scan.toLower(0, len(scan.b))
|
||||
end = parseExtension(&scan)
|
||||
if end != len(s) {
|
||||
return Extension{}, errSyntax
|
||||
}
|
||||
return Extension{string(scan.b)}, nil
|
||||
}
|
||||
|
||||
// Type returns the one-byte extension type of e. It returns 0 for the zero
|
||||
// exception.
|
||||
func (e Extension) Type() byte {
|
||||
if e.s == "" {
|
||||
return 0
|
||||
}
|
||||
return e.s[0]
|
||||
}
|
||||
|
||||
// Tokens returns the list of tokens of e.
|
||||
func (e Extension) Tokens() []string {
|
||||
return strings.Split(e.s, "-")
|
||||
}
|
||||
|
||||
// Extension returns the extension of type x for tag t. It will return
|
||||
// false for ok if t does not have the requested extension. The returned
|
||||
// extension will be invalid in this case.
|
||||
func (t Tag) Extension(x byte) (ext Extension, ok bool) {
|
||||
for i := int(t.pExt); i < len(t.str)-1; {
|
||||
var ext string
|
||||
i, ext = getExtension(t.str, i)
|
||||
if ext[0] == x {
|
||||
return Extension{ext}, true
|
||||
}
|
||||
}
|
||||
return Extension{}, false
|
||||
}
|
||||
|
||||
// Extensions returns all extensions of t.
|
||||
func (t Tag) Extensions() []Extension {
|
||||
e := []Extension{}
|
||||
for i := int(t.pExt); i < len(t.str)-1; {
|
||||
var ext string
|
||||
i, ext = getExtension(t.str, i)
|
||||
e = append(e, Extension{ext})
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
// TypeForKey returns the type associated with the given key, where key and type
|
||||
// are of the allowed values defined for the Unicode locale extension ('u') in
|
||||
// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// TypeForKey will traverse the inheritance chain to get the correct value.
|
||||
func (t Tag) TypeForKey(key string) string {
|
||||
if start, end, _ := t.findTypeForKey(key); end != start {
|
||||
return t.str[start:end]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
var (
|
||||
errPrivateUse = errors.New("cannot set a key on a private use tag")
|
||||
errInvalidArguments = errors.New("invalid key or type")
|
||||
)
|
||||
|
||||
// SetTypeForKey returns a new Tag with the key set to type, where key and type
|
||||
// are of the allowed values defined for the Unicode locale extension ('u') in
|
||||
// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// An empty value removes an existing pair with the same key.
|
||||
func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
|
||||
if t.private() {
|
||||
return t, errPrivateUse
|
||||
}
|
||||
if len(key) != 2 {
|
||||
return t, errInvalidArguments
|
||||
}
|
||||
|
||||
// Remove the setting if value is "".
|
||||
if value == "" {
|
||||
start, end, _ := t.findTypeForKey(key)
|
||||
if start != end {
|
||||
// Remove key tag and leading '-'.
|
||||
start -= 4
|
||||
|
||||
// Remove a possible empty extension.
|
||||
if (end == len(t.str) || t.str[end+2] == '-') && t.str[start-2] == '-' {
|
||||
start -= 2
|
||||
}
|
||||
if start == int(t.pVariant) && end == len(t.str) {
|
||||
t.str = ""
|
||||
t.pVariant, t.pExt = 0, 0
|
||||
} else {
|
||||
t.str = fmt.Sprintf("%s%s", t.str[:start], t.str[end:])
|
||||
}
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
|
||||
if len(value) < 3 || len(value) > 8 {
|
||||
return t, errInvalidArguments
|
||||
}
|
||||
|
||||
var (
|
||||
buf [maxCoreSize + maxSimpleUExtensionSize]byte
|
||||
uStart int // start of the -u extension.
|
||||
)
|
||||
|
||||
// Generate the tag string if needed.
|
||||
if t.str == "" {
|
||||
uStart = t.genCoreBytes(buf[:])
|
||||
buf[uStart] = '-'
|
||||
uStart++
|
||||
}
|
||||
|
||||
// Create new key-type pair and parse it to verify.
|
||||
b := buf[uStart:]
|
||||
copy(b, "u-")
|
||||
copy(b[2:], key)
|
||||
b[4] = '-'
|
||||
b = b[:5+copy(b[5:], value)]
|
||||
scan := makeScanner(b)
|
||||
if parseExtensions(&scan); scan.err != nil {
|
||||
return t, scan.err
|
||||
}
|
||||
|
||||
// Assemble the replacement string.
|
||||
if t.str == "" {
|
||||
t.pVariant, t.pExt = byte(uStart-1), uint16(uStart-1)
|
||||
t.str = string(buf[:uStart+len(b)])
|
||||
} else {
|
||||
s := t.str
|
||||
start, end, hasExt := t.findTypeForKey(key)
|
||||
if start == end {
|
||||
if hasExt {
|
||||
b = b[2:]
|
||||
}
|
||||
t.str = fmt.Sprintf("%s-%s%s", s[:start], b, s[end:])
|
||||
} else {
|
||||
t.str = fmt.Sprintf("%s%s%s", s[:start], value, s[end:])
|
||||
}
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// findKeyAndType returns the start and end position for the type corresponding
|
||||
// to key or the point at which to insert the key-value pair if the type
|
||||
// wasn't found. The hasExt return value reports whether an -u extension was present.
|
||||
// Note: the extensions are typically very small and are likely to contain
|
||||
// only one key-type pair.
|
||||
func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) {
|
||||
p := int(t.pExt)
|
||||
if len(key) != 2 || p == len(t.str) || p == 0 {
|
||||
return p, p, false
|
||||
}
|
||||
s := t.str
|
||||
|
||||
// Find the correct extension.
|
||||
for p++; s[p] != 'u'; p++ {
|
||||
if s[p] > 'u' {
|
||||
p--
|
||||
return p, p, false
|
||||
}
|
||||
if p = nextExtension(s, p); p == len(s) {
|
||||
return len(s), len(s), false
|
||||
}
|
||||
}
|
||||
// Proceed to the hyphen following the extension name.
|
||||
p++
|
||||
|
||||
// curKey is the key currently being processed.
|
||||
curKey := ""
|
||||
|
||||
// Iterate over keys until we get the end of a section.
|
||||
for {
|
||||
// p points to the hyphen preceding the current token.
|
||||
if p3 := p + 3; s[p3] == '-' {
|
||||
// Found a key.
|
||||
// Check whether we just processed the key that was requested.
|
||||
if curKey == key {
|
||||
return start, p, true
|
||||
}
|
||||
// Set to the next key and continue scanning type tokens.
|
||||
curKey = s[p+1 : p3]
|
||||
if curKey > key {
|
||||
return p, p, true
|
||||
}
|
||||
// Start of the type token sequence.
|
||||
start = p + 4
|
||||
// A type is at least 3 characters long.
|
||||
p += 7 // 4 + 3
|
||||
} else {
|
||||
// Attribute or type, which is at least 3 characters long.
|
||||
p += 4
|
||||
}
|
||||
// p points past the third character of a type or attribute.
|
||||
max := p + 5 // maximum length of token plus hyphen.
|
||||
if len(s) < max {
|
||||
max = len(s)
|
||||
}
|
||||
for ; p < max && s[p] != '-'; p++ {
|
||||
}
|
||||
// Bail if we have exhausted all tokens or if the next token starts
|
||||
// a new extension.
|
||||
if p == len(s) || s[p+2] == '-' {
|
||||
if curKey == key {
|
||||
return start, p, true
|
||||
}
|
||||
return p, p, true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// CompactIndex returns an index, where 0 <= index < NumCompactTags, for tags
|
||||
// for which data exists in the text repository. The index will change over time
|
||||
// and should not be stored in persistent storage. Extensions, except for the
|
||||
// 'va' type of the 'u' extension, are ignored. It will return 0, false if no
|
||||
// compact tag exists, where 0 is the index for the root language (Und).
|
||||
func CompactIndex(t Tag) (index int, ok bool) {
|
||||
// TODO: perhaps give more frequent tags a lower index.
|
||||
// TODO: we could make the indexes stable. This will excluded some
|
||||
// possibilities for optimization, so don't do this quite yet.
|
||||
b, s, r := t.Raw()
|
||||
if len(t.str) > 0 {
|
||||
if strings.HasPrefix(t.str, "x-") {
|
||||
// We have no entries for user-defined tags.
|
||||
return 0, false
|
||||
}
|
||||
if uint16(t.pVariant) != t.pExt {
|
||||
// There are no tags with variants and an u-va type.
|
||||
if t.TypeForKey("va") != "" {
|
||||
return 0, false
|
||||
}
|
||||
t, _ = Raw.Compose(b, s, r, t.Variants())
|
||||
} else if _, ok := t.Extension('u'); ok {
|
||||
// Strip all but the 'va' entry.
|
||||
variant := t.TypeForKey("va")
|
||||
t, _ = Raw.Compose(b, s, r)
|
||||
t, _ = t.SetTypeForKey("va", variant)
|
||||
}
|
||||
if len(t.str) > 0 {
|
||||
// We have some variants.
|
||||
for i, s := range specialTags {
|
||||
if s == t {
|
||||
return i + 1, true
|
||||
}
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
}
|
||||
// No variants specified: just compare core components.
|
||||
// The key has the form lllssrrr, where l, s, and r are nibbles for
|
||||
// respectively the langID, scriptID, and regionID.
|
||||
key := uint32(b.langID) << (8 + 12)
|
||||
key |= uint32(s.scriptID) << 12
|
||||
key |= uint32(r.regionID)
|
||||
x, ok := coreTags[key]
|
||||
return int(x), ok
|
||||
}
|
||||
|
||||
// Base is an ISO 639 language code, used for encoding the base language
|
||||
// of a language tag.
|
||||
type Base struct {
|
||||
langID
|
||||
}
|
||||
|
||||
// ParseBase parses a 2- or 3-letter ISO 639 code.
|
||||
// It returns a ValueError if s is a well-formed but unknown language identifier
|
||||
// or another error if another error occurred.
|
||||
func ParseBase(s string) (Base, error) {
|
||||
if n := len(s); n < 2 || 3 < n {
|
||||
return Base{}, errSyntax
|
||||
}
|
||||
var buf [3]byte
|
||||
l, err := getLangID(buf[:copy(buf[:], s)])
|
||||
return Base{l}, err
|
||||
}
|
||||
|
||||
// Script is a 4-letter ISO 15924 code for representing scripts.
|
||||
// It is idiomatically represented in title case.
|
||||
type Script struct {
|
||||
scriptID
|
||||
}
|
||||
|
||||
// ParseScript parses a 4-letter ISO 15924 code.
|
||||
// It returns a ValueError if s is a well-formed but unknown script identifier
|
||||
// or another error if another error occurred.
|
||||
func ParseScript(s string) (Script, error) {
|
||||
if len(s) != 4 {
|
||||
return Script{}, errSyntax
|
||||
}
|
||||
var buf [4]byte
|
||||
sc, err := getScriptID(script, buf[:copy(buf[:], s)])
|
||||
return Script{sc}, err
|
||||
}
|
||||
|
||||
// Region is an ISO 3166-1 or UN M.49 code for representing countries and regions.
|
||||
type Region struct {
|
||||
regionID
|
||||
}
|
||||
|
||||
// EncodeM49 returns the Region for the given UN M.49 code.
|
||||
// It returns an error if r is not a valid code.
|
||||
func EncodeM49(r int) (Region, error) {
|
||||
rid, err := getRegionM49(r)
|
||||
return Region{rid}, err
|
||||
}
|
||||
|
||||
// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
|
||||
// It returns a ValueError if s is a well-formed but unknown region identifier
|
||||
// or another error if another error occurred.
|
||||
func ParseRegion(s string) (Region, error) {
|
||||
if n := len(s); n < 2 || 3 < n {
|
||||
return Region{}, errSyntax
|
||||
}
|
||||
var buf [3]byte
|
||||
r, err := getRegionID(buf[:copy(buf[:], s)])
|
||||
return Region{r}, err
|
||||
}
|
||||
|
||||
// IsCountry returns whether this region is a country or autonomous area. This
|
||||
// includes non-standard definitions from CLDR.
|
||||
func (r Region) IsCountry() bool {
|
||||
if r.regionID == 0 || r.IsGroup() || r.IsPrivateUse() && r.regionID != _XK {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// IsGroup returns whether this region defines a collection of regions. This
|
||||
// includes non-standard definitions from CLDR.
|
||||
func (r Region) IsGroup() bool {
|
||||
if r.regionID == 0 {
|
||||
return false
|
||||
}
|
||||
return int(regionInclusion[r.regionID]) < len(regionContainment)
|
||||
}
|
||||
|
||||
// Contains returns whether Region c is contained by Region r. It returns true
|
||||
// if c == r.
|
||||
func (r Region) Contains(c Region) bool {
|
||||
return r.regionID.contains(c.regionID)
|
||||
}
|
||||
|
||||
func (r regionID) contains(c regionID) bool {
|
||||
if r == c {
|
||||
return true
|
||||
}
|
||||
g := regionInclusion[r]
|
||||
if g >= nRegionGroups {
|
||||
return false
|
||||
}
|
||||
m := regionContainment[g]
|
||||
|
||||
d := regionInclusion[c]
|
||||
b := regionInclusionBits[d]
|
||||
|
||||
// A contained country may belong to multiple disjoint groups. Matching any
|
||||
// of these indicates containment. If the contained region is a group, it
|
||||
// must strictly be a subset.
|
||||
if d >= nRegionGroups {
|
||||
return b&m != 0
|
||||
}
|
||||
return b&^m == 0
|
||||
}
|
||||
|
||||
var errNoTLD = errors.New("language: region is not a valid ccTLD")
|
||||
|
||||
// TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
|
||||
// In all other cases it returns either the region itself or an error.
|
||||
//
|
||||
// This method may return an error for a region for which there exists a
|
||||
// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The
|
||||
// region will already be canonicalized it was obtained from a Tag that was
|
||||
// obtained using any of the default methods.
|
||||
func (r Region) TLD() (Region, error) {
|
||||
// See http://en.wikipedia.org/wiki/Country_code_top-level_domain for the
|
||||
// difference between ISO 3166-1 and IANA ccTLD.
|
||||
if r.regionID == _GB {
|
||||
r = Region{_UK}
|
||||
}
|
||||
if (r.typ() & ccTLD) == 0 {
|
||||
return Region{}, errNoTLD
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// Canonicalize returns the region or a possible replacement if the region is
|
||||
// deprecated. It will not return a replacement for deprecated regions that
|
||||
// are split into multiple regions.
|
||||
func (r Region) Canonicalize() Region {
|
||||
if cr := normRegion(r.regionID); cr != 0 {
|
||||
return Region{cr}
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// Variant represents a registered variant of a language as defined by BCP 47.
|
||||
type Variant struct {
|
||||
variant string
|
||||
}
|
||||
|
||||
// ParseVariant parses and returns a Variant. An error is returned if s is not
|
||||
// a valid variant.
|
||||
func ParseVariant(s string) (Variant, error) {
|
||||
s = strings.ToLower(s)
|
||||
if _, ok := variantIndex[s]; ok {
|
||||
return Variant{s}, nil
|
||||
}
|
||||
return Variant{}, mkErrInvalid([]byte(s))
|
||||
}
|
||||
|
||||
// String returns the string representation of the variant.
|
||||
func (v Variant) String() string {
|
||||
return v.variant
|
||||
}
|
396
vendor/golang.org/x/text/language/lookup.go
generated
vendored
396
vendor/golang.org/x/text/language/lookup.go
generated
vendored
|
@ -1,396 +0,0 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strconv"
|
||||
|
||||
"golang.org/x/text/internal/tag"
|
||||
)
|
||||
|
||||
// findIndex tries to find the given tag in idx and returns a standardized error
|
||||
// if it could not be found.
|
||||
func findIndex(idx tag.Index, key []byte, form string) (index int, err error) {
|
||||
if !tag.FixCase(form, key) {
|
||||
return 0, errSyntax
|
||||
}
|
||||
i := idx.Index(key)
|
||||
if i == -1 {
|
||||
return 0, mkErrInvalid(key)
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
|
||||
func searchUint(imap []uint16, key uint16) int {
|
||||
return sort.Search(len(imap), func(i int) bool {
|
||||
return imap[i] >= key
|
||||
})
|
||||
}
|
||||
|
||||
type langID uint16
|
||||
|
||||
// getLangID returns the langID of s if s is a canonical subtag
|
||||
// or langUnknown if s is not a canonical subtag.
|
||||
func getLangID(s []byte) (langID, error) {
|
||||
if len(s) == 2 {
|
||||
return getLangISO2(s)
|
||||
}
|
||||
return getLangISO3(s)
|
||||
}
|
||||
|
||||
// mapLang returns the mapped langID of id according to mapping m.
|
||||
func normLang(id langID) (langID, langAliasType) {
|
||||
k := sort.Search(len(langAliasMap), func(i int) bool {
|
||||
return langAliasMap[i].from >= uint16(id)
|
||||
})
|
||||
if k < len(langAliasMap) && langAliasMap[k].from == uint16(id) {
|
||||
return langID(langAliasMap[k].to), langAliasTypes[k]
|
||||
}
|
||||
return id, langAliasTypeUnknown
|
||||
}
|
||||
|
||||
// getLangISO2 returns the langID for the given 2-letter ISO language code
|
||||
// or unknownLang if this does not exist.
|
||||
func getLangISO2(s []byte) (langID, error) {
|
||||
if !tag.FixCase("zz", s) {
|
||||
return 0, errSyntax
|
||||
}
|
||||
if i := lang.Index(s); i != -1 && lang.Elem(i)[3] != 0 {
|
||||
return langID(i), nil
|
||||
}
|
||||
return 0, mkErrInvalid(s)
|
||||
}
|
||||
|
||||
const base = 'z' - 'a' + 1
|
||||
|
||||
func strToInt(s []byte) uint {
|
||||
v := uint(0)
|
||||
for i := 0; i < len(s); i++ {
|
||||
v *= base
|
||||
v += uint(s[i] - 'a')
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
// converts the given integer to the original ASCII string passed to strToInt.
|
||||
// len(s) must match the number of characters obtained.
|
||||
func intToStr(v uint, s []byte) {
|
||||
for i := len(s) - 1; i >= 0; i-- {
|
||||
s[i] = byte(v%base) + 'a'
|
||||
v /= base
|
||||
}
|
||||
}
|
||||
|
||||
// getLangISO3 returns the langID for the given 3-letter ISO language code
|
||||
// or unknownLang if this does not exist.
|
||||
func getLangISO3(s []byte) (langID, error) {
|
||||
if tag.FixCase("und", s) {
|
||||
// first try to match canonical 3-letter entries
|
||||
for i := lang.Index(s[:2]); i != -1; i = lang.Next(s[:2], i) {
|
||||
if e := lang.Elem(i); e[3] == 0 && e[2] == s[2] {
|
||||
// We treat "und" as special and always translate it to "unspecified".
|
||||
// Note that ZZ and Zzzz are private use and are not treated as
|
||||
// unspecified by default.
|
||||
id := langID(i)
|
||||
if id == nonCanonicalUnd {
|
||||
return 0, nil
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
}
|
||||
if i := altLangISO3.Index(s); i != -1 {
|
||||
return langID(altLangIndex[altLangISO3.Elem(i)[3]]), nil
|
||||
}
|
||||
n := strToInt(s)
|
||||
if langNoIndex[n/8]&(1<<(n%8)) != 0 {
|
||||
return langID(n) + langNoIndexOffset, nil
|
||||
}
|
||||
// Check for non-canonical uses of ISO3.
|
||||
for i := lang.Index(s[:1]); i != -1; i = lang.Next(s[:1], i) {
|
||||
if e := lang.Elem(i); e[2] == s[1] && e[3] == s[2] {
|
||||
return langID(i), nil
|
||||
}
|
||||
}
|
||||
return 0, mkErrInvalid(s)
|
||||
}
|
||||
return 0, errSyntax
|
||||
}
|
||||
|
||||
// stringToBuf writes the string to b and returns the number of bytes
|
||||
// written. cap(b) must be >= 3.
|
||||
func (id langID) stringToBuf(b []byte) int {
|
||||
if id >= langNoIndexOffset {
|
||||
intToStr(uint(id)-langNoIndexOffset, b[:3])
|
||||
return 3
|
||||
} else if id == 0 {
|
||||
return copy(b, "und")
|
||||
}
|
||||
l := lang[id<<2:]
|
||||
if l[3] == 0 {
|
||||
return copy(b, l[:3])
|
||||
}
|
||||
return copy(b, l[:2])
|
||||
}
|
||||
|
||||
// String returns the BCP 47 representation of the langID.
|
||||
// Use b as variable name, instead of id, to ensure the variable
|
||||
// used is consistent with that of Base in which this type is embedded.
|
||||
func (b langID) String() string {
|
||||
if b == 0 {
|
||||
return "und"
|
||||
} else if b >= langNoIndexOffset {
|
||||
b -= langNoIndexOffset
|
||||
buf := [3]byte{}
|
||||
intToStr(uint(b), buf[:])
|
||||
return string(buf[:])
|
||||
}
|
||||
l := lang.Elem(int(b))
|
||||
if l[3] == 0 {
|
||||
return l[:3]
|
||||
}
|
||||
return l[:2]
|
||||
}
|
||||
|
||||
// ISO3 returns the ISO 639-3 language code.
|
||||
func (b langID) ISO3() string {
|
||||
if b == 0 || b >= langNoIndexOffset {
|
||||
return b.String()
|
||||
}
|
||||
l := lang.Elem(int(b))
|
||||
if l[3] == 0 {
|
||||
return l[:3]
|
||||
} else if l[2] == 0 {
|
||||
return altLangISO3.Elem(int(l[3]))[:3]
|
||||
}
|
||||
// This allocation will only happen for 3-letter ISO codes
|
||||
// that are non-canonical BCP 47 language identifiers.
|
||||
return l[0:1] + l[2:4]
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether this language code is reserved for private use.
|
||||
func (b langID) IsPrivateUse() bool {
|
||||
return langPrivateStart <= b && b <= langPrivateEnd
|
||||
}
|
||||
|
||||
type regionID uint16
|
||||
|
||||
// getRegionID returns the region id for s if s is a valid 2-letter region code
|
||||
// or unknownRegion.
|
||||
func getRegionID(s []byte) (regionID, error) {
|
||||
if len(s) == 3 {
|
||||
if isAlpha(s[0]) {
|
||||
return getRegionISO3(s)
|
||||
}
|
||||
if i, err := strconv.ParseUint(string(s), 10, 10); err == nil {
|
||||
return getRegionM49(int(i))
|
||||
}
|
||||
}
|
||||
return getRegionISO2(s)
|
||||
}
|
||||
|
||||
// getRegionISO2 returns the regionID for the given 2-letter ISO country code
|
||||
// or unknownRegion if this does not exist.
|
||||
func getRegionISO2(s []byte) (regionID, error) {
|
||||
i, err := findIndex(regionISO, s, "ZZ")
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return regionID(i) + isoRegionOffset, nil
|
||||
}
|
||||
|
||||
// getRegionISO3 returns the regionID for the given 3-letter ISO country code
|
||||
// or unknownRegion if this does not exist.
|
||||
func getRegionISO3(s []byte) (regionID, error) {
|
||||
if tag.FixCase("ZZZ", s) {
|
||||
for i := regionISO.Index(s[:1]); i != -1; i = regionISO.Next(s[:1], i) {
|
||||
if e := regionISO.Elem(i); e[2] == s[1] && e[3] == s[2] {
|
||||
return regionID(i) + isoRegionOffset, nil
|
||||
}
|
||||
}
|
||||
for i := 0; i < len(altRegionISO3); i += 3 {
|
||||
if tag.Compare(altRegionISO3[i:i+3], s) == 0 {
|
||||
return regionID(altRegionIDs[i/3]), nil
|
||||
}
|
||||
}
|
||||
return 0, mkErrInvalid(s)
|
||||
}
|
||||
return 0, errSyntax
|
||||
}
|
||||
|
||||
func getRegionM49(n int) (regionID, error) {
|
||||
if 0 < n && n <= 999 {
|
||||
const (
|
||||
searchBits = 7
|
||||
regionBits = 9
|
||||
regionMask = 1<<regionBits - 1
|
||||
)
|
||||
idx := n >> searchBits
|
||||
buf := fromM49[m49Index[idx]:m49Index[idx+1]]
|
||||
val := uint16(n) << regionBits // we rely on bits shifting out
|
||||
i := sort.Search(len(buf), func(i int) bool {
|
||||
return buf[i] >= val
|
||||
})
|
||||
if r := fromM49[int(m49Index[idx])+i]; r&^regionMask == val {
|
||||
return regionID(r & regionMask), nil
|
||||
}
|
||||
}
|
||||
var e ValueError
|
||||
fmt.Fprint(bytes.NewBuffer([]byte(e.v[:])), n)
|
||||
return 0, e
|
||||
}
|
||||
|
||||
// normRegion returns a region if r is deprecated or 0 otherwise.
|
||||
// TODO: consider supporting BYS (-> BLR), CSK (-> 200 or CZ), PHI (-> PHL) and AFI (-> DJ).
|
||||
// TODO: consider mapping split up regions to new most populous one (like CLDR).
|
||||
func normRegion(r regionID) regionID {
|
||||
m := regionOldMap
|
||||
k := sort.Search(len(m), func(i int) bool {
|
||||
return m[i].from >= uint16(r)
|
||||
})
|
||||
if k < len(m) && m[k].from == uint16(r) {
|
||||
return regionID(m[k].to)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
const (
|
||||
iso3166UserAssigned = 1 << iota
|
||||
ccTLD
|
||||
bcp47Region
|
||||
)
|
||||
|
||||
func (r regionID) typ() byte {
|
||||
return regionTypes[r]
|
||||
}
|
||||
|
||||
// String returns the BCP 47 representation for the region.
|
||||
// It returns "ZZ" for an unspecified region.
|
||||
func (r regionID) String() string {
|
||||
if r < isoRegionOffset {
|
||||
if r == 0 {
|
||||
return "ZZ"
|
||||
}
|
||||
return fmt.Sprintf("%03d", r.M49())
|
||||
}
|
||||
r -= isoRegionOffset
|
||||
return regionISO.Elem(int(r))[:2]
|
||||
}
|
||||
|
||||
// ISO3 returns the 3-letter ISO code of r.
|
||||
// Note that not all regions have a 3-letter ISO code.
|
||||
// In such cases this method returns "ZZZ".
|
||||
func (r regionID) ISO3() string {
|
||||
if r < isoRegionOffset {
|
||||
return "ZZZ"
|
||||
}
|
||||
r -= isoRegionOffset
|
||||
reg := regionISO.Elem(int(r))
|
||||
switch reg[2] {
|
||||
case 0:
|
||||
return altRegionISO3[reg[3]:][:3]
|
||||
case ' ':
|
||||
return "ZZZ"
|
||||
}
|
||||
return reg[0:1] + reg[2:4]
|
||||
}
|
||||
|
||||
// M49 returns the UN M.49 encoding of r, or 0 if this encoding
|
||||
// is not defined for r.
|
||||
func (r regionID) M49() int {
|
||||
return int(m49[r])
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
|
||||
// may include private-use tags that are assigned by CLDR and used in this
|
||||
// implementation. So IsPrivateUse and IsCountry can be simultaneously true.
|
||||
func (r regionID) IsPrivateUse() bool {
|
||||
return r.typ()&iso3166UserAssigned != 0
|
||||
}
|
||||
|
||||
type scriptID uint8
|
||||
|
||||
// getScriptID returns the script id for string s. It assumes that s
|
||||
// is of the format [A-Z][a-z]{3}.
|
||||
func getScriptID(idx tag.Index, s []byte) (scriptID, error) {
|
||||
i, err := findIndex(idx, s, "Zzzz")
|
||||
return scriptID(i), err
|
||||
}
|
||||
|
||||
// String returns the script code in title case.
|
||||
// It returns "Zzzz" for an unspecified script.
|
||||
func (s scriptID) String() string {
|
||||
if s == 0 {
|
||||
return "Zzzz"
|
||||
}
|
||||
return script.Elem(int(s))
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether this script code is reserved for private use.
|
||||
func (s scriptID) IsPrivateUse() bool {
|
||||
return _Qaaa <= s && s <= _Qabx
|
||||
}
|
||||
|
||||
const (
|
||||
maxAltTaglen = len("en-US-POSIX")
|
||||
maxLen = maxAltTaglen
|
||||
)
|
||||
|
||||
var (
|
||||
// grandfatheredMap holds a mapping from legacy and grandfathered tags to
|
||||
// their base language or index to more elaborate tag.
|
||||
grandfatheredMap = map[[maxLen]byte]int16{
|
||||
[maxLen]byte{'a', 'r', 't', '-', 'l', 'o', 'j', 'b', 'a', 'n'}: _jbo, // art-lojban
|
||||
[maxLen]byte{'i', '-', 'a', 'm', 'i'}: _ami, // i-ami
|
||||
[maxLen]byte{'i', '-', 'b', 'n', 'n'}: _bnn, // i-bnn
|
||||
[maxLen]byte{'i', '-', 'h', 'a', 'k'}: _hak, // i-hak
|
||||
[maxLen]byte{'i', '-', 'k', 'l', 'i', 'n', 'g', 'o', 'n'}: _tlh, // i-klingon
|
||||
[maxLen]byte{'i', '-', 'l', 'u', 'x'}: _lb, // i-lux
|
||||
[maxLen]byte{'i', '-', 'n', 'a', 'v', 'a', 'j', 'o'}: _nv, // i-navajo
|
||||
[maxLen]byte{'i', '-', 'p', 'w', 'n'}: _pwn, // i-pwn
|
||||
[maxLen]byte{'i', '-', 't', 'a', 'o'}: _tao, // i-tao
|
||||
[maxLen]byte{'i', '-', 't', 'a', 'y'}: _tay, // i-tay
|
||||
[maxLen]byte{'i', '-', 't', 's', 'u'}: _tsu, // i-tsu
|
||||
[maxLen]byte{'n', 'o', '-', 'b', 'o', 'k'}: _nb, // no-bok
|
||||
[maxLen]byte{'n', 'o', '-', 'n', 'y', 'n'}: _nn, // no-nyn
|
||||
[maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'f', 'r'}: _sfb, // sgn-BE-FR
|
||||
[maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'n', 'l'}: _vgt, // sgn-BE-NL
|
||||
[maxLen]byte{'s', 'g', 'n', '-', 'c', 'h', '-', 'd', 'e'}: _sgg, // sgn-CH-DE
|
||||
[maxLen]byte{'z', 'h', '-', 'g', 'u', 'o', 'y', 'u'}: _cmn, // zh-guoyu
|
||||
[maxLen]byte{'z', 'h', '-', 'h', 'a', 'k', 'k', 'a'}: _hak, // zh-hakka
|
||||
[maxLen]byte{'z', 'h', '-', 'm', 'i', 'n', '-', 'n', 'a', 'n'}: _nan, // zh-min-nan
|
||||
[maxLen]byte{'z', 'h', '-', 'x', 'i', 'a', 'n', 'g'}: _hsn, // zh-xiang
|
||||
|
||||
// Grandfathered tags with no modern replacement will be converted as
|
||||
// follows:
|
||||
[maxLen]byte{'c', 'e', 'l', '-', 'g', 'a', 'u', 'l', 'i', 's', 'h'}: -1, // cel-gaulish
|
||||
[maxLen]byte{'e', 'n', '-', 'g', 'b', '-', 'o', 'e', 'd'}: -2, // en-GB-oed
|
||||
[maxLen]byte{'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}: -3, // i-default
|
||||
[maxLen]byte{'i', '-', 'e', 'n', 'o', 'c', 'h', 'i', 'a', 'n'}: -4, // i-enochian
|
||||
[maxLen]byte{'i', '-', 'm', 'i', 'n', 'g', 'o'}: -5, // i-mingo
|
||||
[maxLen]byte{'z', 'h', '-', 'm', 'i', 'n'}: -6, // zh-min
|
||||
|
||||
// CLDR-specific tag.
|
||||
[maxLen]byte{'r', 'o', 'o', 't'}: 0, // root
|
||||
[maxLen]byte{'e', 'n', '-', 'u', 's', '-', 'p', 'o', 's', 'i', 'x'}: -7, // en_US_POSIX"
|
||||
}
|
||||
|
||||
altTagIndex = [...]uint8{0, 17, 31, 45, 61, 74, 86, 102}
|
||||
|
||||
altTags = "xtg-x-cel-gaulishen-GB-oxendicten-x-i-defaultund-x-i-enochiansee-x-i-mingonan-x-zh-minen-US-u-va-posix"
|
||||
)
|
||||
|
||||
func grandfathered(s [maxAltTaglen]byte) (t Tag, ok bool) {
|
||||
if v, ok := grandfatheredMap[s]; ok {
|
||||
if v < 0 {
|
||||
return Make(altTags[altTagIndex[-v-1]:altTagIndex[-v]]), true
|
||||
}
|
||||
t.lang = langID(v)
|
||||
return t, true
|
||||
}
|
||||
return t, false
|
||||
}
|
1648
vendor/golang.org/x/text/language/maketables.go
generated
vendored
1648
vendor/golang.org/x/text/language/maketables.go
generated
vendored
File diff suppressed because it is too large
Load diff
841
vendor/golang.org/x/text/language/match.go
generated
vendored
841
vendor/golang.org/x/text/language/match.go
generated
vendored
|
@ -1,841 +0,0 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import "errors"
|
||||
|
||||
// Matcher is the interface that wraps the Match method.
|
||||
//
|
||||
// Match returns the best match for any of the given tags, along with
|
||||
// a unique index associated with the returned tag and a confidence
|
||||
// score.
|
||||
type Matcher interface {
|
||||
Match(t ...Tag) (tag Tag, index int, c Confidence)
|
||||
}
|
||||
|
||||
// Comprehends reports the confidence score for a speaker of a given language
|
||||
// to being able to comprehend the written form of an alternative language.
|
||||
func Comprehends(speaker, alternative Tag) Confidence {
|
||||
_, _, c := NewMatcher([]Tag{alternative}).Match(speaker)
|
||||
return c
|
||||
}
|
||||
|
||||
// NewMatcher returns a Matcher that matches an ordered list of preferred tags
|
||||
// against a list of supported tags based on written intelligibility, closeness
|
||||
// of dialect, equivalence of subtags and various other rules. It is initialized
|
||||
// with the list of supported tags. The first element is used as the default
|
||||
// value in case no match is found.
|
||||
//
|
||||
// Its Match method matches the first of the given Tags to reach a certain
|
||||
// confidence threshold. The tags passed to Match should therefore be specified
|
||||
// in order of preference. Extensions are ignored for matching.
|
||||
//
|
||||
// The index returned by the Match method corresponds to the index of the
|
||||
// matched tag in t, but is augmented with the Unicode extension ('u')of the
|
||||
// corresponding preferred tag. This allows user locale options to be passed
|
||||
// transparently.
|
||||
func NewMatcher(t []Tag) Matcher {
|
||||
return newMatcher(t)
|
||||
}
|
||||
|
||||
func (m *matcher) Match(want ...Tag) (t Tag, index int, c Confidence) {
|
||||
match, w, c := m.getBest(want...)
|
||||
if match == nil {
|
||||
t = m.default_.tag
|
||||
} else {
|
||||
t, index = match.tag, match.index
|
||||
}
|
||||
// Copy options from the user-provided tag into the result tag. This is hard
|
||||
// to do after the fact, so we do it here.
|
||||
// TODO: consider also adding in variants that are compatible with the
|
||||
// matched language.
|
||||
// TODO: Add back region if it is non-ambiguous? Or create another tag to
|
||||
// preserve the region?
|
||||
if u, ok := w.Extension('u'); ok {
|
||||
t, _ = Raw.Compose(t, u)
|
||||
}
|
||||
return t, index, c
|
||||
}
|
||||
|
||||
type scriptRegionFlags uint8
|
||||
|
||||
const (
|
||||
isList = 1 << iota
|
||||
scriptInFrom
|
||||
regionInFrom
|
||||
)
|
||||
|
||||
func (t *Tag) setUndefinedLang(id langID) {
|
||||
if t.lang == 0 {
|
||||
t.lang = id
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tag) setUndefinedScript(id scriptID) {
|
||||
if t.script == 0 {
|
||||
t.script = id
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tag) setUndefinedRegion(id regionID) {
|
||||
if t.region == 0 || t.region.contains(id) {
|
||||
t.region = id
|
||||
}
|
||||
}
|
||||
|
||||
// ErrMissingLikelyTagsData indicates no information was available
|
||||
// to compute likely values of missing tags.
|
||||
var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
|
||||
|
||||
// addLikelySubtags sets subtags to their most likely value, given the locale.
|
||||
// In most cases this means setting fields for unknown values, but in some
|
||||
// cases it may alter a value. It returns a ErrMissingLikelyTagsData error
|
||||
// if the given locale cannot be expanded.
|
||||
func (t Tag) addLikelySubtags() (Tag, error) {
|
||||
id, err := addTags(t)
|
||||
if err != nil {
|
||||
return t, err
|
||||
} else if id.equalTags(t) {
|
||||
return t, nil
|
||||
}
|
||||
id.remakeString()
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// specializeRegion attempts to specialize a group region.
|
||||
func specializeRegion(t *Tag) bool {
|
||||
if i := regionInclusion[t.region]; i < nRegionGroups {
|
||||
x := likelyRegionGroup[i]
|
||||
if langID(x.lang) == t.lang && scriptID(x.script) == t.script {
|
||||
t.region = regionID(x.region)
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func addTags(t Tag) (Tag, error) {
|
||||
// We leave private use identifiers alone.
|
||||
if t.private() {
|
||||
return t, nil
|
||||
}
|
||||
if t.script != 0 && t.region != 0 {
|
||||
if t.lang != 0 {
|
||||
// already fully specified
|
||||
specializeRegion(&t)
|
||||
return t, nil
|
||||
}
|
||||
// Search matches for und-script-region. Note that for these cases
|
||||
// region will never be a group so there is no need to check for this.
|
||||
list := likelyRegion[t.region : t.region+1]
|
||||
if x := list[0]; x.flags&isList != 0 {
|
||||
list = likelyRegionList[x.lang : x.lang+uint16(x.script)]
|
||||
}
|
||||
for _, x := range list {
|
||||
// Deviating from the spec. See match_test.go for details.
|
||||
if scriptID(x.script) == t.script {
|
||||
t.setUndefinedLang(langID(x.lang))
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
if t.lang != 0 {
|
||||
// Search matches for lang-script and lang-region, where lang != und.
|
||||
if t.lang < langNoIndexOffset {
|
||||
x := likelyLang[t.lang]
|
||||
if x.flags&isList != 0 {
|
||||
list := likelyLangList[x.region : x.region+uint16(x.script)]
|
||||
if t.script != 0 {
|
||||
for _, x := range list {
|
||||
if scriptID(x.script) == t.script && x.flags&scriptInFrom != 0 {
|
||||
t.setUndefinedRegion(regionID(x.region))
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
} else if t.region != 0 {
|
||||
count := 0
|
||||
goodScript := true
|
||||
tt := t
|
||||
for _, x := range list {
|
||||
// We visit all entries for which the script was not
|
||||
// defined, including the ones where the region was not
|
||||
// defined. This allows for proper disambiguation within
|
||||
// regions.
|
||||
if x.flags&scriptInFrom == 0 && t.region.contains(regionID(x.region)) {
|
||||
tt.region = regionID(x.region)
|
||||
tt.setUndefinedScript(scriptID(x.script))
|
||||
goodScript = goodScript && tt.script == scriptID(x.script)
|
||||
count++
|
||||
}
|
||||
}
|
||||
if count == 1 {
|
||||
return tt, nil
|
||||
}
|
||||
// Even if we fail to find a unique Region, we might have
|
||||
// an unambiguous script.
|
||||
if goodScript {
|
||||
t.script = tt.script
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Search matches for und-script.
|
||||
if t.script != 0 {
|
||||
x := likelyScript[t.script]
|
||||
if x.region != 0 {
|
||||
t.setUndefinedRegion(regionID(x.region))
|
||||
t.setUndefinedLang(langID(x.lang))
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
// Search matches for und-region. If und-script-region exists, it would
|
||||
// have been found earlier.
|
||||
if t.region != 0 {
|
||||
if i := regionInclusion[t.region]; i < nRegionGroups {
|
||||
x := likelyRegionGroup[i]
|
||||
if x.region != 0 {
|
||||
t.setUndefinedLang(langID(x.lang))
|
||||
t.setUndefinedScript(scriptID(x.script))
|
||||
t.region = regionID(x.region)
|
||||
}
|
||||
} else {
|
||||
x := likelyRegion[t.region]
|
||||
if x.flags&isList != 0 {
|
||||
x = likelyRegionList[x.lang]
|
||||
}
|
||||
if x.script != 0 && x.flags != scriptInFrom {
|
||||
t.setUndefinedLang(langID(x.lang))
|
||||
t.setUndefinedScript(scriptID(x.script))
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Search matches for lang.
|
||||
if t.lang < langNoIndexOffset {
|
||||
x := likelyLang[t.lang]
|
||||
if x.flags&isList != 0 {
|
||||
x = likelyLangList[x.region]
|
||||
}
|
||||
if x.region != 0 {
|
||||
t.setUndefinedScript(scriptID(x.script))
|
||||
t.setUndefinedRegion(regionID(x.region))
|
||||
}
|
||||
specializeRegion(&t)
|
||||
if t.lang == 0 {
|
||||
t.lang = _en // default language
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
return t, ErrMissingLikelyTagsData
|
||||
}
|
||||
|
||||
func (t *Tag) setTagsFrom(id Tag) {
|
||||
t.lang = id.lang
|
||||
t.script = id.script
|
||||
t.region = id.region
|
||||
}
|
||||
|
||||
// minimize removes the region or script subtags from t such that
|
||||
// t.addLikelySubtags() == t.minimize().addLikelySubtags().
|
||||
func (t Tag) minimize() (Tag, error) {
|
||||
t, err := minimizeTags(t)
|
||||
if err != nil {
|
||||
return t, err
|
||||
}
|
||||
t.remakeString()
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// minimizeTags mimics the behavior of the ICU 51 C implementation.
|
||||
func minimizeTags(t Tag) (Tag, error) {
|
||||
if t.equalTags(und) {
|
||||
return t, nil
|
||||
}
|
||||
max, err := addTags(t)
|
||||
if err != nil {
|
||||
return t, err
|
||||
}
|
||||
for _, id := range [...]Tag{
|
||||
{lang: t.lang},
|
||||
{lang: t.lang, region: t.region},
|
||||
{lang: t.lang, script: t.script},
|
||||
} {
|
||||
if x, err := addTags(id); err == nil && max.equalTags(x) {
|
||||
t.setTagsFrom(id)
|
||||
break
|
||||
}
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// Tag Matching
|
||||
// CLDR defines an algorithm for finding the best match between two sets of language
|
||||
// tags. The basic algorithm defines how to score a possible match and then find
|
||||
// the match with the best score
|
||||
// (see http://www.unicode.org/reports/tr35/#LanguageMatching).
|
||||
// Using scoring has several disadvantages. The scoring obfuscates the importance of
|
||||
// the various factors considered, making the algorithm harder to understand. Using
|
||||
// scoring also requires the full score to be computed for each pair of tags.
|
||||
//
|
||||
// We will use a different algorithm which aims to have the following properties:
|
||||
// - clarity on the precedence of the various selection factors, and
|
||||
// - improved performance by allowing early termination of a comparison.
|
||||
//
|
||||
// Matching algorithm (overview)
|
||||
// Input:
|
||||
// - supported: a set of supported tags
|
||||
// - default: the default tag to return in case there is no match
|
||||
// - desired: list of desired tags, ordered by preference, starting with
|
||||
// the most-preferred.
|
||||
//
|
||||
// Algorithm:
|
||||
// 1) Set the best match to the lowest confidence level
|
||||
// 2) For each tag in "desired":
|
||||
// a) For each tag in "supported":
|
||||
// 1) compute the match between the two tags.
|
||||
// 2) if the match is better than the previous best match, replace it
|
||||
// with the new match. (see next section)
|
||||
// b) if the current best match is above a certain threshold, return this
|
||||
// match without proceeding to the next tag in "desired". [See Note 1]
|
||||
// 3) If the best match so far is below a certain threshold, return "default".
|
||||
//
|
||||
// Ranking:
|
||||
// We use two phases to determine whether one pair of tags are a better match
|
||||
// than another pair of tags. First, we determine a rough confidence level. If the
|
||||
// levels are different, the one with the highest confidence wins.
|
||||
// Second, if the rough confidence levels are identical, we use a set of tie-breaker
|
||||
// rules.
|
||||
//
|
||||
// The confidence level of matching a pair of tags is determined by finding the
|
||||
// lowest confidence level of any matches of the corresponding subtags (the
|
||||
// result is deemed as good as its weakest link).
|
||||
// We define the following levels:
|
||||
// Exact - An exact match of a subtag, before adding likely subtags.
|
||||
// MaxExact - An exact match of a subtag, after adding likely subtags.
|
||||
// [See Note 2].
|
||||
// High - High level of mutual intelligibility between different subtag
|
||||
// variants.
|
||||
// Low - Low level of mutual intelligibility between different subtag
|
||||
// variants.
|
||||
// No - No mutual intelligibility.
|
||||
//
|
||||
// The following levels can occur for each type of subtag:
|
||||
// Base: Exact, MaxExact, High, Low, No
|
||||
// Script: Exact, MaxExact [see Note 3], Low, No
|
||||
// Region: Exact, MaxExact, High
|
||||
// Variant: Exact, High
|
||||
// Private: Exact, No
|
||||
//
|
||||
// Any result with a confidence level of Low or higher is deemed a possible match.
|
||||
// Once a desired tag matches any of the supported tags with a level of MaxExact
|
||||
// or higher, the next desired tag is not considered (see Step 2.b).
|
||||
// Note that CLDR provides languageMatching data that defines close equivalence
|
||||
// classes for base languages, scripts and regions.
|
||||
//
|
||||
// Tie-breaking
|
||||
// If we get the same confidence level for two matches, we apply a sequence of
|
||||
// tie-breaking rules. The first that succeeds defines the result. The rules are
|
||||
// applied in the following order.
|
||||
// 1) Original language was defined and was identical.
|
||||
// 2) Original region was defined and was identical.
|
||||
// 3) Distance between two maximized regions was the smallest.
|
||||
// 4) Original script was defined and was identical.
|
||||
// 5) Distance from want tag to have tag using the parent relation [see Note 5.]
|
||||
// If there is still no winner after these rules are applied, the first match
|
||||
// found wins.
|
||||
//
|
||||
// Notes:
|
||||
// [1] Note that even if we may not have a perfect match, if a match is above a
|
||||
// certain threshold, it is considered a better match than any other match
|
||||
// to a tag later in the list of preferred language tags.
|
||||
// [2] In practice, as matching of Exact is done in a separate phase from
|
||||
// matching the other levels, we reuse the Exact level to mean MaxExact in
|
||||
// the second phase. As a consequence, we only need the levels defined by
|
||||
// the Confidence type. The MaxExact confidence level is mapped to High in
|
||||
// the public API.
|
||||
// [3] We do not differentiate between maximized script values that were derived
|
||||
// from suppressScript versus most likely tag data. We determined that in
|
||||
// ranking the two, one ranks just after the other. Moreover, the two cannot
|
||||
// occur concurrently. As a consequence, they are identical for practical
|
||||
// purposes.
|
||||
// [4] In case of deprecated, macro-equivalents and legacy mappings, we assign
|
||||
// the MaxExact level to allow iw vs he to still be a closer match than
|
||||
// en-AU vs en-US, for example.
|
||||
// [5] In CLDR a locale inherits fields that are unspecified for this locale
|
||||
// from its parent. Therefore, if a locale is a parent of another locale,
|
||||
// it is a strong measure for closeness, especially when no other tie
|
||||
// breaker rule applies. One could also argue it is inconsistent, for
|
||||
// example, when pt-AO matches pt (which CLDR equates with pt-BR), even
|
||||
// though its parent is pt-PT according to the inheritance rules.
|
||||
//
|
||||
// Implementation Details:
|
||||
// There are several performance considerations worth pointing out. Most notably,
|
||||
// we preprocess as much as possible (within reason) at the time of creation of a
|
||||
// matcher. This includes:
|
||||
// - creating a per-language map, which includes data for the raw base language
|
||||
// and its canonicalized variant (if applicable),
|
||||
// - expanding entries for the equivalence classes defined in CLDR's
|
||||
// languageMatch data.
|
||||
// The per-language map ensures that typically only a very small number of tags
|
||||
// need to be considered. The pre-expansion of canonicalized subtags and
|
||||
// equivalence classes reduces the amount of map lookups that need to be done at
|
||||
// runtime.
|
||||
|
||||
// matcher keeps a set of supported language tags, indexed by language.
|
||||
type matcher struct {
|
||||
default_ *haveTag
|
||||
index map[langID]*matchHeader
|
||||
passSettings bool
|
||||
}
|
||||
|
||||
// matchHeader has the lists of tags for exact matches and matches based on
|
||||
// maximized and canonicalized tags for a given language.
|
||||
type matchHeader struct {
|
||||
exact []*haveTag
|
||||
max []*haveTag
|
||||
}
|
||||
|
||||
// haveTag holds a supported Tag and its maximized script and region. The maximized
|
||||
// or canonicalized language is not stored as it is not needed during matching.
|
||||
type haveTag struct {
|
||||
tag Tag
|
||||
|
||||
// index of this tag in the original list of supported tags.
|
||||
index int
|
||||
|
||||
// conf is the maximum confidence that can result from matching this haveTag.
|
||||
// When conf < Exact this means it was inserted after applying a CLDR equivalence rule.
|
||||
conf Confidence
|
||||
|
||||
// Maximized region and script.
|
||||
maxRegion regionID
|
||||
maxScript scriptID
|
||||
|
||||
// altScript may be checked as an alternative match to maxScript. If altScript
|
||||
// matches, the confidence level for this match is Low. Theoretically there
|
||||
// could be multiple alternative scripts. This does not occur in practice.
|
||||
altScript scriptID
|
||||
|
||||
// nextMax is the index of the next haveTag with the same maximized tags.
|
||||
nextMax uint16
|
||||
}
|
||||
|
||||
func makeHaveTag(tag Tag, index int) (haveTag, langID) {
|
||||
max := tag
|
||||
if tag.lang != 0 {
|
||||
max, _ = max.canonicalize(All)
|
||||
max, _ = addTags(max)
|
||||
max.remakeString()
|
||||
}
|
||||
return haveTag{tag, index, Exact, max.region, max.script, altScript(max.lang, max.script), 0}, max.lang
|
||||
}
|
||||
|
||||
// altScript returns an alternative script that may match the given script with
|
||||
// a low confidence. At the moment, the langMatch data allows for at most one
|
||||
// script to map to another and we rely on this to keep the code simple.
|
||||
func altScript(l langID, s scriptID) scriptID {
|
||||
for _, alt := range matchScript {
|
||||
if (alt.lang == 0 || langID(alt.lang) == l) && scriptID(alt.have) == s {
|
||||
return scriptID(alt.want)
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// addIfNew adds a haveTag to the list of tags only if it is a unique tag.
|
||||
// Tags that have the same maximized values are linked by index.
|
||||
func (h *matchHeader) addIfNew(n haveTag, exact bool) {
|
||||
// Don't add new exact matches.
|
||||
for _, v := range h.exact {
|
||||
if v.tag.equalsRest(n.tag) {
|
||||
return
|
||||
}
|
||||
}
|
||||
if exact {
|
||||
h.exact = append(h.exact, &n)
|
||||
}
|
||||
// Allow duplicate maximized tags, but create a linked list to allow quickly
|
||||
// comparing the equivalents and bail out.
|
||||
for i, v := range h.max {
|
||||
if v.maxScript == n.maxScript &&
|
||||
v.maxRegion == n.maxRegion &&
|
||||
v.tag.variantOrPrivateTagStr() == n.tag.variantOrPrivateTagStr() {
|
||||
for h.max[i].nextMax != 0 {
|
||||
i = int(h.max[i].nextMax)
|
||||
}
|
||||
h.max[i].nextMax = uint16(len(h.max))
|
||||
break
|
||||
}
|
||||
}
|
||||
h.max = append(h.max, &n)
|
||||
}
|
||||
|
||||
// header returns the matchHeader for the given language. It creates one if
|
||||
// it doesn't already exist.
|
||||
func (m *matcher) header(l langID) *matchHeader {
|
||||
if h := m.index[l]; h != nil {
|
||||
return h
|
||||
}
|
||||
h := &matchHeader{}
|
||||
m.index[l] = h
|
||||
return h
|
||||
}
|
||||
|
||||
// newMatcher builds an index for the given supported tags and returns it as
|
||||
// a matcher. It also expands the index by considering various equivalence classes
|
||||
// for a given tag.
|
||||
func newMatcher(supported []Tag) *matcher {
|
||||
m := &matcher{
|
||||
index: make(map[langID]*matchHeader),
|
||||
}
|
||||
if len(supported) == 0 {
|
||||
m.default_ = &haveTag{}
|
||||
return m
|
||||
}
|
||||
// Add supported languages to the index. Add exact matches first to give
|
||||
// them precedence.
|
||||
for i, tag := range supported {
|
||||
pair, _ := makeHaveTag(tag, i)
|
||||
m.header(tag.lang).addIfNew(pair, true)
|
||||
}
|
||||
m.default_ = m.header(supported[0].lang).exact[0]
|
||||
for i, tag := range supported {
|
||||
pair, max := makeHaveTag(tag, i)
|
||||
if max != tag.lang {
|
||||
m.header(max).addIfNew(pair, false)
|
||||
}
|
||||
}
|
||||
|
||||
// update is used to add indexes in the map for equivalent languages.
|
||||
// If force is true, the update will also apply to derived entries. To
|
||||
// avoid applying a "transitive closure", use false.
|
||||
update := func(want, have uint16, conf Confidence, force bool) {
|
||||
if hh := m.index[langID(have)]; hh != nil {
|
||||
if !force && len(hh.exact) == 0 {
|
||||
return
|
||||
}
|
||||
hw := m.header(langID(want))
|
||||
for _, ht := range hh.max {
|
||||
v := *ht
|
||||
if conf < v.conf {
|
||||
v.conf = conf
|
||||
}
|
||||
v.nextMax = 0 // this value needs to be recomputed
|
||||
if v.altScript != 0 {
|
||||
v.altScript = altScript(langID(want), v.maxScript)
|
||||
}
|
||||
hw.addIfNew(v, conf == Exact && len(hh.exact) > 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add entries for languages with mutual intelligibility as defined by CLDR's
|
||||
// languageMatch data.
|
||||
for _, ml := range matchLang {
|
||||
update(ml.want, ml.have, Confidence(ml.conf), false)
|
||||
if !ml.oneway {
|
||||
update(ml.have, ml.want, Confidence(ml.conf), false)
|
||||
}
|
||||
}
|
||||
|
||||
// Add entries for possible canonicalizations. This is an optimization to
|
||||
// ensure that only one map lookup needs to be done at runtime per desired tag.
|
||||
// First we match deprecated equivalents. If they are perfect equivalents
|
||||
// (their canonicalization simply substitutes a different language code, but
|
||||
// nothing else), the match confidence is Exact, otherwise it is High.
|
||||
for i, lm := range langAliasMap {
|
||||
if lm.from == _sh {
|
||||
continue
|
||||
}
|
||||
|
||||
// If deprecated codes match and there is no fiddling with the script or
|
||||
// or region, we consider it an exact match.
|
||||
conf := Exact
|
||||
if langAliasTypes[i] != langMacro {
|
||||
if !isExactEquivalent(langID(lm.from)) {
|
||||
conf = High
|
||||
}
|
||||
update(lm.to, lm.from, conf, true)
|
||||
}
|
||||
update(lm.from, lm.to, conf, true)
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// getBest gets the best matching tag in m for any of the given tags, taking into
|
||||
// account the order of preference of the given tags.
|
||||
func (m *matcher) getBest(want ...Tag) (got *haveTag, orig Tag, c Confidence) {
|
||||
best := bestMatch{}
|
||||
for _, w := range want {
|
||||
var max Tag
|
||||
// Check for exact match first.
|
||||
h := m.index[w.lang]
|
||||
if w.lang != 0 {
|
||||
// Base language is defined.
|
||||
if h == nil {
|
||||
continue
|
||||
}
|
||||
for i := range h.exact {
|
||||
have := h.exact[i]
|
||||
if have.tag.equalsRest(w) {
|
||||
return have, w, Exact
|
||||
}
|
||||
}
|
||||
max, _ = w.canonicalize(Legacy | Deprecated)
|
||||
max, _ = addTags(max)
|
||||
} else {
|
||||
// Base language is not defined.
|
||||
if h != nil {
|
||||
for i := range h.exact {
|
||||
have := h.exact[i]
|
||||
if have.tag.equalsRest(w) {
|
||||
return have, w, Exact
|
||||
}
|
||||
}
|
||||
}
|
||||
if w.script == 0 && w.region == 0 {
|
||||
// We skip all tags matching und for approximate matching, including
|
||||
// private tags.
|
||||
continue
|
||||
}
|
||||
max, _ = addTags(w)
|
||||
if h = m.index[max.lang]; h == nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
// Check for match based on maximized tag.
|
||||
for i := range h.max {
|
||||
have := h.max[i]
|
||||
best.update(have, w, max.script, max.region)
|
||||
if best.conf == Exact {
|
||||
for have.nextMax != 0 {
|
||||
have = h.max[have.nextMax]
|
||||
best.update(have, w, max.script, max.region)
|
||||
}
|
||||
return best.have, best.want, High
|
||||
}
|
||||
}
|
||||
}
|
||||
if best.conf <= No {
|
||||
if len(want) != 0 {
|
||||
return nil, want[0], No
|
||||
}
|
||||
return nil, Tag{}, No
|
||||
}
|
||||
return best.have, best.want, best.conf
|
||||
}
|
||||
|
||||
// bestMatch accumulates the best match so far.
|
||||
type bestMatch struct {
|
||||
have *haveTag
|
||||
want Tag
|
||||
conf Confidence
|
||||
// Cached results from applying tie-breaking rules.
|
||||
origLang bool
|
||||
origReg bool
|
||||
regDist uint8
|
||||
origScript bool
|
||||
parentDist uint8 // 255 if have is not an ancestor of want tag.
|
||||
}
|
||||
|
||||
// update updates the existing best match if the new pair is considered to be a
|
||||
// better match.
|
||||
// To determine if the given pair is a better match, it first computes the rough
|
||||
// confidence level. If this surpasses the current match, it will replace it and
|
||||
// update the tie-breaker rule cache. If there is a tie, it proceeds with applying
|
||||
// a series of tie-breaker rules. If there is no conclusive winner after applying
|
||||
// the tie-breaker rules, it leaves the current match as the preferred match.
|
||||
func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion regionID) {
|
||||
// Bail if the maximum attainable confidence is below that of the current best match.
|
||||
c := have.conf
|
||||
if c < m.conf {
|
||||
return
|
||||
}
|
||||
if have.maxScript != maxScript {
|
||||
// There is usually very little comprehension between different scripts.
|
||||
// In a few cases there may still be Low comprehension. This possibility is
|
||||
// pre-computed and stored in have.altScript.
|
||||
if Low < m.conf || have.altScript != maxScript {
|
||||
return
|
||||
}
|
||||
c = Low
|
||||
} else if have.maxRegion != maxRegion {
|
||||
// There is usually a small difference between languages across regions.
|
||||
// We use the region distance (below) to disambiguate between equal matches.
|
||||
if High < c {
|
||||
c = High
|
||||
}
|
||||
}
|
||||
|
||||
// We store the results of the computations of the tie-breaker rules along
|
||||
// with the best match. There is no need to do the checks once we determine
|
||||
// we have a winner, but we do still need to do the tie-breaker computations.
|
||||
// We use "beaten" to keep track if we still need to do the checks.
|
||||
beaten := false // true if the new pair defeats the current one.
|
||||
if c != m.conf {
|
||||
if c < m.conf {
|
||||
return
|
||||
}
|
||||
beaten = true
|
||||
}
|
||||
|
||||
// Tie-breaker rules:
|
||||
// We prefer if the pre-maximized language was specified and identical.
|
||||
origLang := have.tag.lang == tag.lang && tag.lang != 0
|
||||
if !beaten && m.origLang != origLang {
|
||||
if m.origLang {
|
||||
return
|
||||
}
|
||||
beaten = true
|
||||
}
|
||||
|
||||
// We prefer if the pre-maximized region was specified and identical.
|
||||
origReg := have.tag.region == tag.region && tag.region != 0
|
||||
if !beaten && m.origReg != origReg {
|
||||
if m.origReg {
|
||||
return
|
||||
}
|
||||
beaten = true
|
||||
}
|
||||
|
||||
// Next we prefer smaller distances between regions, as defined by regionDist.
|
||||
regDist := regionDist(have.maxRegion, maxRegion, tag.lang)
|
||||
if !beaten && m.regDist != regDist {
|
||||
if regDist > m.regDist {
|
||||
return
|
||||
}
|
||||
beaten = true
|
||||
}
|
||||
|
||||
// Next we prefer if the pre-maximized script was specified and identical.
|
||||
origScript := have.tag.script == tag.script && tag.script != 0
|
||||
if !beaten && m.origScript != origScript {
|
||||
if m.origScript {
|
||||
return
|
||||
}
|
||||
beaten = true
|
||||
}
|
||||
|
||||
// Finally we prefer tags which have a closer parent relationship.
|
||||
parentDist := parentDistance(have.tag.region, tag)
|
||||
if !beaten && m.parentDist != parentDist {
|
||||
if parentDist > m.parentDist {
|
||||
return
|
||||
}
|
||||
beaten = true
|
||||
}
|
||||
|
||||
// Update m to the newly found best match.
|
||||
if beaten {
|
||||
m.have = have
|
||||
m.want = tag
|
||||
m.conf = c
|
||||
m.origLang = origLang
|
||||
m.origReg = origReg
|
||||
m.origScript = origScript
|
||||
m.regDist = regDist
|
||||
m.parentDist = parentDist
|
||||
}
|
||||
}
|
||||
|
||||
// parentDistance returns the number of times Parent must be called before the
|
||||
// regions match. It is assumed that it has already been checked that lang and
|
||||
// script are identical. If haveRegion does not occur in the ancestor chain of
|
||||
// tag, it returns 255.
|
||||
func parentDistance(haveRegion regionID, tag Tag) uint8 {
|
||||
p := tag.Parent()
|
||||
d := uint8(1)
|
||||
for haveRegion != p.region {
|
||||
if p.region == 0 {
|
||||
return 255
|
||||
}
|
||||
p = p.Parent()
|
||||
d++
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// regionDist wraps regionDistance with some exceptions to the algorithmic distance.
|
||||
func regionDist(a, b regionID, lang langID) uint8 {
|
||||
if lang == _en {
|
||||
// Two variants of non-US English are close to each other, regardless of distance.
|
||||
if a != _US && b != _US {
|
||||
return 2
|
||||
}
|
||||
}
|
||||
return uint8(regionDistance(a, b))
|
||||
}
|
||||
|
||||
// regionDistance computes the distance between two regions based on the
|
||||
// distance in the graph of region containments as defined in CLDR. It iterates
|
||||
// over increasingly inclusive sets of groups, represented as bit vectors, until
|
||||
// the source bit vector has bits in common with the destination vector.
|
||||
func regionDistance(a, b regionID) int {
|
||||
if a == b {
|
||||
return 0
|
||||
}
|
||||
p, q := regionInclusion[a], regionInclusion[b]
|
||||
if p < nRegionGroups {
|
||||
p, q = q, p
|
||||
}
|
||||
set := regionInclusionBits
|
||||
if q < nRegionGroups && set[p]&(1<<q) != 0 {
|
||||
return 1
|
||||
}
|
||||
d := 2
|
||||
for goal := set[q]; set[p]&goal == 0; p = regionInclusionNext[p] {
|
||||
d++
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
func (t Tag) variants() string {
|
||||
if t.pVariant == 0 {
|
||||
return ""
|
||||
}
|
||||
return t.str[t.pVariant:t.pExt]
|
||||
}
|
||||
|
||||
// variantOrPrivateTagStr returns variants or private use tags.
|
||||
func (t Tag) variantOrPrivateTagStr() string {
|
||||
if t.pExt > 0 {
|
||||
return t.str[t.pVariant:t.pExt]
|
||||
}
|
||||
return t.str[t.pVariant:]
|
||||
}
|
||||
|
||||
// equalsRest compares everything except the language.
|
||||
func (a Tag) equalsRest(b Tag) bool {
|
||||
// TODO: don't include extensions in this comparison. To do this efficiently,
|
||||
// though, we should handle private tags separately.
|
||||
return a.script == b.script && a.region == b.region && a.variantOrPrivateTagStr() == b.variantOrPrivateTagStr()
|
||||
}
|
||||
|
||||
// isExactEquivalent returns true if canonicalizing the language will not alter
|
||||
// the script or region of a tag.
|
||||
func isExactEquivalent(l langID) bool {
|
||||
for _, o := range notEquivalent {
|
||||
if o == l {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
var notEquivalent []langID
|
||||
|
||||
func init() {
|
||||
// Create a list of all languages for which canonicalization may alter the
|
||||
// script or region.
|
||||
for _, lm := range langAliasMap {
|
||||
tag := Tag{lang: langID(lm.from)}
|
||||
if tag, _ = tag.canonicalize(All); tag.script != 0 || tag.region != 0 {
|
||||
notEquivalent = append(notEquivalent, langID(lm.from))
|
||||
}
|
||||
}
|
||||
}
|
859
vendor/golang.org/x/text/language/parse.go
generated
vendored
859
vendor/golang.org/x/text/language/parse.go
generated
vendored
|
@ -1,859 +0,0 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/tag"
|
||||
)
|
||||
|
||||
// isAlpha returns true if the byte is not a digit.
|
||||
// b must be an ASCII letter or digit.
|
||||
func isAlpha(b byte) bool {
|
||||
return b > '9'
|
||||
}
|
||||
|
||||
// isAlphaNum returns true if the string contains only ASCII letters or digits.
|
||||
func isAlphaNum(s []byte) bool {
|
||||
for _, c := range s {
|
||||
if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// errSyntax is returned by any of the parsing functions when the
|
||||
// input is not well-formed, according to BCP 47.
|
||||
// TODO: return the position at which the syntax error occurred?
|
||||
var errSyntax = errors.New("language: tag is not well-formed")
|
||||
|
||||
// ValueError is returned by any of the parsing functions when the
|
||||
// input is well-formed but the respective subtag is not recognized
|
||||
// as a valid value.
|
||||
type ValueError struct {
|
||||
v [8]byte
|
||||
}
|
||||
|
||||
func mkErrInvalid(s []byte) error {
|
||||
var e ValueError
|
||||
copy(e.v[:], s)
|
||||
return e
|
||||
}
|
||||
|
||||
func (e ValueError) tag() []byte {
|
||||
n := bytes.IndexByte(e.v[:], 0)
|
||||
if n == -1 {
|
||||
n = 8
|
||||
}
|
||||
return e.v[:n]
|
||||
}
|
||||
|
||||
// Error implements the error interface.
|
||||
func (e ValueError) Error() string {
|
||||
return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag())
|
||||
}
|
||||
|
||||
// Subtag returns the subtag for which the error occurred.
|
||||
func (e ValueError) Subtag() string {
|
||||
return string(e.tag())
|
||||
}
|
||||
|
||||
// scanner is used to scan BCP 47 tokens, which are separated by _ or -.
|
||||
type scanner struct {
|
||||
b []byte
|
||||
bytes [max99thPercentileSize]byte
|
||||
token []byte
|
||||
start int // start position of the current token
|
||||
end int // end position of the current token
|
||||
next int // next point for scan
|
||||
err error
|
||||
done bool
|
||||
}
|
||||
|
||||
func makeScannerString(s string) scanner {
|
||||
scan := scanner{}
|
||||
if len(s) <= len(scan.bytes) {
|
||||
scan.b = scan.bytes[:copy(scan.bytes[:], s)]
|
||||
} else {
|
||||
scan.b = []byte(s)
|
||||
}
|
||||
scan.init()
|
||||
return scan
|
||||
}
|
||||
|
||||
// makeScanner returns a scanner using b as the input buffer.
|
||||
// b is not copied and may be modified by the scanner routines.
|
||||
func makeScanner(b []byte) scanner {
|
||||
scan := scanner{b: b}
|
||||
scan.init()
|
||||
return scan
|
||||
}
|
||||
|
||||
func (s *scanner) init() {
|
||||
for i, c := range s.b {
|
||||
if c == '_' {
|
||||
s.b[i] = '-'
|
||||
}
|
||||
}
|
||||
s.scan()
|
||||
}
|
||||
|
||||
// restToLower converts the string between start and end to lower case.
|
||||
func (s *scanner) toLower(start, end int) {
|
||||
for i := start; i < end; i++ {
|
||||
c := s.b[i]
|
||||
if 'A' <= c && c <= 'Z' {
|
||||
s.b[i] += 'a' - 'A'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *scanner) setError(e error) {
|
||||
if s.err == nil || (e == errSyntax && s.err != errSyntax) {
|
||||
s.err = e
|
||||
}
|
||||
}
|
||||
|
||||
// resizeRange shrinks or grows the array at position oldStart such that
|
||||
// a new string of size newSize can fit between oldStart and oldEnd.
|
||||
// Sets the scan point to after the resized range.
|
||||
func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) {
|
||||
s.start = oldStart
|
||||
if end := oldStart + newSize; end != oldEnd {
|
||||
diff := end - oldEnd
|
||||
if end < cap(s.b) {
|
||||
b := make([]byte, len(s.b)+diff)
|
||||
copy(b, s.b[:oldStart])
|
||||
copy(b[end:], s.b[oldEnd:])
|
||||
s.b = b
|
||||
} else {
|
||||
s.b = append(s.b[end:], s.b[oldEnd:]...)
|
||||
}
|
||||
s.next = end + (s.next - s.end)
|
||||
s.end = end
|
||||
}
|
||||
}
|
||||
|
||||
// replace replaces the current token with repl.
|
||||
func (s *scanner) replace(repl string) {
|
||||
s.resizeRange(s.start, s.end, len(repl))
|
||||
copy(s.b[s.start:], repl)
|
||||
}
|
||||
|
||||
// gobble removes the current token from the input.
|
||||
// Caller must call scan after calling gobble.
|
||||
func (s *scanner) gobble(e error) {
|
||||
s.setError(e)
|
||||
if s.start == 0 {
|
||||
s.b = s.b[:+copy(s.b, s.b[s.next:])]
|
||||
s.end = 0
|
||||
} else {
|
||||
s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
|
||||
s.end = s.start - 1
|
||||
}
|
||||
s.next = s.start
|
||||
}
|
||||
|
||||
// deleteRange removes the given range from s.b before the current token.
|
||||
func (s *scanner) deleteRange(start, end int) {
|
||||
s.setError(errSyntax)
|
||||
s.b = s.b[:start+copy(s.b[start:], s.b[end:])]
|
||||
diff := end - start
|
||||
s.next -= diff
|
||||
s.start -= diff
|
||||
s.end -= diff
|
||||
}
|
||||
|
||||
// scan parses the next token of a BCP 47 string. Tokens that are larger
|
||||
// than 8 characters or include non-alphanumeric characters result in an error
|
||||
// and are gobbled and removed from the output.
|
||||
// It returns the end position of the last token consumed.
|
||||
func (s *scanner) scan() (end int) {
|
||||
end = s.end
|
||||
s.token = nil
|
||||
for s.start = s.next; s.next < len(s.b); {
|
||||
i := bytes.IndexByte(s.b[s.next:], '-')
|
||||
if i == -1 {
|
||||
s.end = len(s.b)
|
||||
s.next = len(s.b)
|
||||
i = s.end - s.start
|
||||
} else {
|
||||
s.end = s.next + i
|
||||
s.next = s.end + 1
|
||||
}
|
||||
token := s.b[s.start:s.end]
|
||||
if i < 1 || i > 8 || !isAlphaNum(token) {
|
||||
s.gobble(errSyntax)
|
||||
continue
|
||||
}
|
||||
s.token = token
|
||||
return end
|
||||
}
|
||||
if n := len(s.b); n > 0 && s.b[n-1] == '-' {
|
||||
s.setError(errSyntax)
|
||||
s.b = s.b[:len(s.b)-1]
|
||||
}
|
||||
s.done = true
|
||||
return end
|
||||
}
|
||||
|
||||
// acceptMinSize parses multiple tokens of the given size or greater.
|
||||
// It returns the end position of the last token consumed.
|
||||
func (s *scanner) acceptMinSize(min int) (end int) {
|
||||
end = s.end
|
||||
s.scan()
|
||||
for ; len(s.token) >= min; s.scan() {
|
||||
end = s.end
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
|
||||
// failed it returns an error and any part of the tag that could be parsed.
|
||||
// If parsing succeeded but an unknown value was found, it returns
|
||||
// ValueError. The Tag returned in this case is just stripped of the unknown
|
||||
// value. All other values are preserved. It accepts tags in the BCP 47 format
|
||||
// and extensions to this standard defined in
|
||||
// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// The resulting tag is canonicalized using the default canonicalization type.
|
||||
func Parse(s string) (t Tag, err error) {
|
||||
return Default.Parse(s)
|
||||
}
|
||||
|
||||
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
|
||||
// failed it returns an error and any part of the tag that could be parsed.
|
||||
// If parsing succeeded but an unknown value was found, it returns
|
||||
// ValueError. The Tag returned in this case is just stripped of the unknown
|
||||
// value. All other values are preserved. It accepts tags in the BCP 47 format
|
||||
// and extensions to this standard defined in
|
||||
// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// The resulting tag is canonicalized using the the canonicalization type c.
|
||||
func (c CanonType) Parse(s string) (t Tag, err error) {
|
||||
// TODO: consider supporting old-style locale key-value pairs.
|
||||
if s == "" {
|
||||
return und, errSyntax
|
||||
}
|
||||
if len(s) <= maxAltTaglen {
|
||||
b := [maxAltTaglen]byte{}
|
||||
for i, c := range s {
|
||||
// Generating invalid UTF-8 is okay as it won't match.
|
||||
if 'A' <= c && c <= 'Z' {
|
||||
c += 'a' - 'A'
|
||||
} else if c == '_' {
|
||||
c = '-'
|
||||
}
|
||||
b[i] = byte(c)
|
||||
}
|
||||
if t, ok := grandfathered(b); ok {
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
scan := makeScannerString(s)
|
||||
t, err = parse(&scan, s)
|
||||
t, changed := t.canonicalize(c)
|
||||
if changed {
|
||||
t.remakeString()
|
||||
}
|
||||
return t, err
|
||||
}
|
||||
|
||||
func parse(scan *scanner, s string) (t Tag, err error) {
|
||||
t = und
|
||||
var end int
|
||||
if n := len(scan.token); n <= 1 {
|
||||
scan.toLower(0, len(scan.b))
|
||||
if n == 0 || scan.token[0] != 'x' {
|
||||
return t, errSyntax
|
||||
}
|
||||
end = parseExtensions(scan)
|
||||
} else if n >= 4 {
|
||||
return und, errSyntax
|
||||
} else { // the usual case
|
||||
t, end = parseTag(scan)
|
||||
if n := len(scan.token); n == 1 {
|
||||
t.pExt = uint16(end)
|
||||
end = parseExtensions(scan)
|
||||
} else if end < len(scan.b) {
|
||||
scan.setError(errSyntax)
|
||||
scan.b = scan.b[:end]
|
||||
}
|
||||
}
|
||||
if int(t.pVariant) < len(scan.b) {
|
||||
if end < len(s) {
|
||||
s = s[:end]
|
||||
}
|
||||
if len(s) > 0 && tag.Compare(s, scan.b) == 0 {
|
||||
t.str = s
|
||||
} else {
|
||||
t.str = string(scan.b)
|
||||
}
|
||||
} else {
|
||||
t.pVariant, t.pExt = 0, 0
|
||||
}
|
||||
return t, scan.err
|
||||
}
|
||||
|
||||
// parseTag parses language, script, region and variants.
|
||||
// It returns a Tag and the end position in the input that was parsed.
|
||||
func parseTag(scan *scanner) (t Tag, end int) {
|
||||
var e error
|
||||
// TODO: set an error if an unknown lang, script or region is encountered.
|
||||
t.lang, e = getLangID(scan.token)
|
||||
scan.setError(e)
|
||||
scan.replace(t.lang.String())
|
||||
langStart := scan.start
|
||||
end = scan.scan()
|
||||
for len(scan.token) == 3 && isAlpha(scan.token[0]) {
|
||||
// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
|
||||
// to a tag of the form <extlang>.
|
||||
lang, e := getLangID(scan.token)
|
||||
if lang != 0 {
|
||||
t.lang = lang
|
||||
copy(scan.b[langStart:], lang.String())
|
||||
scan.b[langStart+3] = '-'
|
||||
scan.start = langStart + 4
|
||||
}
|
||||
scan.gobble(e)
|
||||
end = scan.scan()
|
||||
}
|
||||
if len(scan.token) == 4 && isAlpha(scan.token[0]) {
|
||||
t.script, e = getScriptID(script, scan.token)
|
||||
if t.script == 0 {
|
||||
scan.gobble(e)
|
||||
}
|
||||
end = scan.scan()
|
||||
}
|
||||
if n := len(scan.token); n >= 2 && n <= 3 {
|
||||
t.region, e = getRegionID(scan.token)
|
||||
if t.region == 0 {
|
||||
scan.gobble(e)
|
||||
} else {
|
||||
scan.replace(t.region.String())
|
||||
}
|
||||
end = scan.scan()
|
||||
}
|
||||
scan.toLower(scan.start, len(scan.b))
|
||||
t.pVariant = byte(end)
|
||||
end = parseVariants(scan, end, t)
|
||||
t.pExt = uint16(end)
|
||||
return t, end
|
||||
}
|
||||
|
||||
var separator = []byte{'-'}
|
||||
|
||||
// parseVariants scans tokens as long as each token is a valid variant string.
|
||||
// Duplicate variants are removed.
|
||||
func parseVariants(scan *scanner, end int, t Tag) int {
|
||||
start := scan.start
|
||||
varIDBuf := [4]uint8{}
|
||||
variantBuf := [4][]byte{}
|
||||
varID := varIDBuf[:0]
|
||||
variant := variantBuf[:0]
|
||||
last := -1
|
||||
needSort := false
|
||||
for ; len(scan.token) >= 4; scan.scan() {
|
||||
// TODO: measure the impact of needing this conversion and redesign
|
||||
// the data structure if there is an issue.
|
||||
v, ok := variantIndex[string(scan.token)]
|
||||
if !ok {
|
||||
// unknown variant
|
||||
// TODO: allow user-defined variants?
|
||||
scan.gobble(mkErrInvalid(scan.token))
|
||||
continue
|
||||
}
|
||||
varID = append(varID, v)
|
||||
variant = append(variant, scan.token)
|
||||
if !needSort {
|
||||
if last < int(v) {
|
||||
last = int(v)
|
||||
} else {
|
||||
needSort = true
|
||||
// There is no legal combinations of more than 7 variants
|
||||
// (and this is by no means a useful sequence).
|
||||
const maxVariants = 8
|
||||
if len(varID) > maxVariants {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
end = scan.end
|
||||
}
|
||||
if needSort {
|
||||
sort.Sort(variantsSort{varID, variant})
|
||||
k, l := 0, -1
|
||||
for i, v := range varID {
|
||||
w := int(v)
|
||||
if l == w {
|
||||
// Remove duplicates.
|
||||
continue
|
||||
}
|
||||
varID[k] = varID[i]
|
||||
variant[k] = variant[i]
|
||||
k++
|
||||
l = w
|
||||
}
|
||||
if str := bytes.Join(variant[:k], separator); len(str) == 0 {
|
||||
end = start - 1
|
||||
} else {
|
||||
scan.resizeRange(start, end, len(str))
|
||||
copy(scan.b[scan.start:], str)
|
||||
end = scan.end
|
||||
}
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
type variantsSort struct {
|
||||
i []uint8
|
||||
v [][]byte
|
||||
}
|
||||
|
||||
func (s variantsSort) Len() int {
|
||||
return len(s.i)
|
||||
}
|
||||
|
||||
func (s variantsSort) Swap(i, j int) {
|
||||
s.i[i], s.i[j] = s.i[j], s.i[i]
|
||||
s.v[i], s.v[j] = s.v[j], s.v[i]
|
||||
}
|
||||
|
||||
func (s variantsSort) Less(i, j int) bool {
|
||||
return s.i[i] < s.i[j]
|
||||
}
|
||||
|
||||
type bytesSort [][]byte
|
||||
|
||||
func (b bytesSort) Len() int {
|
||||
return len(b)
|
||||
}
|
||||
|
||||
func (b bytesSort) Swap(i, j int) {
|
||||
b[i], b[j] = b[j], b[i]
|
||||
}
|
||||
|
||||
func (b bytesSort) Less(i, j int) bool {
|
||||
return bytes.Compare(b[i], b[j]) == -1
|
||||
}
|
||||
|
||||
// parseExtensions parses and normalizes the extensions in the buffer.
|
||||
// It returns the last position of scan.b that is part of any extension.
|
||||
// It also trims scan.b to remove excess parts accordingly.
|
||||
func parseExtensions(scan *scanner) int {
|
||||
start := scan.start
|
||||
exts := [][]byte{}
|
||||
private := []byte{}
|
||||
end := scan.end
|
||||
for len(scan.token) == 1 {
|
||||
extStart := scan.start
|
||||
ext := scan.token[0]
|
||||
end = parseExtension(scan)
|
||||
extension := scan.b[extStart:end]
|
||||
if len(extension) < 3 || (ext != 'x' && len(extension) < 4) {
|
||||
scan.setError(errSyntax)
|
||||
end = extStart
|
||||
continue
|
||||
} else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) {
|
||||
scan.b = scan.b[:end]
|
||||
return end
|
||||
} else if ext == 'x' {
|
||||
private = extension
|
||||
break
|
||||
}
|
||||
exts = append(exts, extension)
|
||||
}
|
||||
sort.Sort(bytesSort(exts))
|
||||
if len(private) > 0 {
|
||||
exts = append(exts, private)
|
||||
}
|
||||
scan.b = scan.b[:start]
|
||||
if len(exts) > 0 {
|
||||
scan.b = append(scan.b, bytes.Join(exts, separator)...)
|
||||
} else if start > 0 {
|
||||
// Strip trailing '-'.
|
||||
scan.b = scan.b[:start-1]
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
// parseExtension parses a single extension and returns the position of
|
||||
// the extension end.
|
||||
func parseExtension(scan *scanner) int {
|
||||
start, end := scan.start, scan.end
|
||||
switch scan.token[0] {
|
||||
case 'u':
|
||||
attrStart := end
|
||||
scan.scan()
|
||||
for last := []byte{}; len(scan.token) > 2; scan.scan() {
|
||||
if bytes.Compare(scan.token, last) != -1 {
|
||||
// Attributes are unsorted. Start over from scratch.
|
||||
p := attrStart + 1
|
||||
scan.next = p
|
||||
attrs := [][]byte{}
|
||||
for scan.scan(); len(scan.token) > 2; scan.scan() {
|
||||
attrs = append(attrs, scan.token)
|
||||
end = scan.end
|
||||
}
|
||||
sort.Sort(bytesSort(attrs))
|
||||
copy(scan.b[p:], bytes.Join(attrs, separator))
|
||||
break
|
||||
}
|
||||
last = scan.token
|
||||
end = scan.end
|
||||
}
|
||||
var last, key []byte
|
||||
for attrEnd := end; len(scan.token) == 2; last = key {
|
||||
key = scan.token
|
||||
keyEnd := scan.end
|
||||
end = scan.acceptMinSize(3)
|
||||
// TODO: check key value validity
|
||||
if keyEnd == end || bytes.Compare(key, last) != 1 {
|
||||
// We have an invalid key or the keys are not sorted.
|
||||
// Start scanning keys from scratch and reorder.
|
||||
p := attrEnd + 1
|
||||
scan.next = p
|
||||
keys := [][]byte{}
|
||||
for scan.scan(); len(scan.token) == 2; {
|
||||
keyStart, keyEnd := scan.start, scan.end
|
||||
end = scan.acceptMinSize(3)
|
||||
if keyEnd != end {
|
||||
keys = append(keys, scan.b[keyStart:end])
|
||||
} else {
|
||||
scan.setError(errSyntax)
|
||||
end = keyStart
|
||||
}
|
||||
}
|
||||
sort.Sort(bytesSort(keys))
|
||||
reordered := bytes.Join(keys, separator)
|
||||
if e := p + len(reordered); e < end {
|
||||
scan.deleteRange(e, end)
|
||||
end = e
|
||||
}
|
||||
copy(scan.b[p:], bytes.Join(keys, separator))
|
||||
break
|
||||
}
|
||||
}
|
||||
case 't':
|
||||
scan.scan()
|
||||
if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
|
||||
_, end = parseTag(scan)
|
||||
scan.toLower(start, end)
|
||||
}
|
||||
for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
|
||||
end = scan.acceptMinSize(3)
|
||||
}
|
||||
case 'x':
|
||||
end = scan.acceptMinSize(1)
|
||||
default:
|
||||
end = scan.acceptMinSize(2)
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
// Compose creates a Tag from individual parts, which may be of type Tag, Base,
|
||||
// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
|
||||
// Base, Script or Region or slice of type Variant or Extension is passed more
|
||||
// than once, the latter will overwrite the former. Variants and Extensions are
|
||||
// accumulated, but if two extensions of the same type are passed, the latter
|
||||
// will replace the former. A Tag overwrites all former values and typically
|
||||
// only makes sense as the first argument. The resulting tag is returned after
|
||||
// canonicalizing using the Default CanonType. If one or more errors are
|
||||
// encountered, one of the errors is returned.
|
||||
func Compose(part ...interface{}) (t Tag, err error) {
|
||||
return Default.Compose(part...)
|
||||
}
|
||||
|
||||
// Compose creates a Tag from individual parts, which may be of type Tag, Base,
|
||||
// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
|
||||
// Base, Script or Region or slice of type Variant or Extension is passed more
|
||||
// than once, the latter will overwrite the former. Variants and Extensions are
|
||||
// accumulated, but if two extensions of the same type are passed, the latter
|
||||
// will replace the former. A Tag overwrites all former values and typically
|
||||
// only makes sense as the first argument. The resulting tag is returned after
|
||||
// canonicalizing using CanonType c. If one or more errors are encountered,
|
||||
// one of the errors is returned.
|
||||
func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
|
||||
var b builder
|
||||
if err = b.update(part...); err != nil {
|
||||
return und, err
|
||||
}
|
||||
t, _ = b.tag.canonicalize(c)
|
||||
|
||||
if len(b.ext) > 0 || len(b.variant) > 0 {
|
||||
sort.Sort(sortVariant(b.variant))
|
||||
sort.Strings(b.ext)
|
||||
if b.private != "" {
|
||||
b.ext = append(b.ext, b.private)
|
||||
}
|
||||
n := maxCoreSize + tokenLen(b.variant...) + tokenLen(b.ext...)
|
||||
buf := make([]byte, n)
|
||||
p := t.genCoreBytes(buf)
|
||||
t.pVariant = byte(p)
|
||||
p += appendTokens(buf[p:], b.variant...)
|
||||
t.pExt = uint16(p)
|
||||
p += appendTokens(buf[p:], b.ext...)
|
||||
t.str = string(buf[:p])
|
||||
} else if b.private != "" {
|
||||
t.str = b.private
|
||||
t.remakeString()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
type builder struct {
|
||||
tag Tag
|
||||
|
||||
private string // the x extension
|
||||
ext []string
|
||||
variant []string
|
||||
|
||||
err error
|
||||
}
|
||||
|
||||
func (b *builder) addExt(e string) {
|
||||
if e == "" {
|
||||
} else if e[0] == 'x' {
|
||||
b.private = e
|
||||
} else {
|
||||
b.ext = append(b.ext, e)
|
||||
}
|
||||
}
|
||||
|
||||
var errInvalidArgument = errors.New("invalid Extension or Variant")
|
||||
|
||||
func (b *builder) update(part ...interface{}) (err error) {
|
||||
replace := func(l *[]string, s string, eq func(a, b string) bool) bool {
|
||||
if s == "" {
|
||||
b.err = errInvalidArgument
|
||||
return true
|
||||
}
|
||||
for i, v := range *l {
|
||||
if eq(v, s) {
|
||||
(*l)[i] = s
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
for _, x := range part {
|
||||
switch v := x.(type) {
|
||||
case Tag:
|
||||
b.tag.lang = v.lang
|
||||
b.tag.region = v.region
|
||||
b.tag.script = v.script
|
||||
if v.str != "" {
|
||||
b.variant = nil
|
||||
for x, s := "", v.str[v.pVariant:v.pExt]; s != ""; {
|
||||
x, s = nextToken(s)
|
||||
b.variant = append(b.variant, x)
|
||||
}
|
||||
b.ext, b.private = nil, ""
|
||||
for i, e := int(v.pExt), ""; i < len(v.str); {
|
||||
i, e = getExtension(v.str, i)
|
||||
b.addExt(e)
|
||||
}
|
||||
}
|
||||
case Base:
|
||||
b.tag.lang = v.langID
|
||||
case Script:
|
||||
b.tag.script = v.scriptID
|
||||
case Region:
|
||||
b.tag.region = v.regionID
|
||||
case Variant:
|
||||
if !replace(&b.variant, v.variant, func(a, b string) bool { return a == b }) {
|
||||
b.variant = append(b.variant, v.variant)
|
||||
}
|
||||
case Extension:
|
||||
if !replace(&b.ext, v.s, func(a, b string) bool { return a[0] == b[0] }) {
|
||||
b.addExt(v.s)
|
||||
}
|
||||
case []Variant:
|
||||
b.variant = nil
|
||||
for _, x := range v {
|
||||
b.update(x)
|
||||
}
|
||||
case []Extension:
|
||||
b.ext, b.private = nil, ""
|
||||
for _, e := range v {
|
||||
b.update(e)
|
||||
}
|
||||
// TODO: support parsing of raw strings based on morphology or just extensions?
|
||||
case error:
|
||||
err = v
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func tokenLen(token ...string) (n int) {
|
||||
for _, t := range token {
|
||||
n += len(t) + 1
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func appendTokens(b []byte, token ...string) int {
|
||||
p := 0
|
||||
for _, t := range token {
|
||||
b[p] = '-'
|
||||
copy(b[p+1:], t)
|
||||
p += 1 + len(t)
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
type sortVariant []string
|
||||
|
||||
func (s sortVariant) Len() int {
|
||||
return len(s)
|
||||
}
|
||||
|
||||
func (s sortVariant) Swap(i, j int) {
|
||||
s[j], s[i] = s[i], s[j]
|
||||
}
|
||||
|
||||
func (s sortVariant) Less(i, j int) bool {
|
||||
return variantIndex[s[i]] < variantIndex[s[j]]
|
||||
}
|
||||
|
||||
func findExt(list []string, x byte) int {
|
||||
for i, e := range list {
|
||||
if e[0] == x {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// getExtension returns the name, body and end position of the extension.
|
||||
func getExtension(s string, p int) (end int, ext string) {
|
||||
if s[p] == '-' {
|
||||
p++
|
||||
}
|
||||
if s[p] == 'x' {
|
||||
return len(s), s[p:]
|
||||
}
|
||||
end = nextExtension(s, p)
|
||||
return end, s[p:end]
|
||||
}
|
||||
|
||||
// nextExtension finds the next extension within the string, searching
|
||||
// for the -<char>- pattern from position p.
|
||||
// In the fast majority of cases, language tags will have at most
|
||||
// one extension and extensions tend to be small.
|
||||
func nextExtension(s string, p int) int {
|
||||
for n := len(s) - 3; p < n; {
|
||||
if s[p] == '-' {
|
||||
if s[p+2] == '-' {
|
||||
return p
|
||||
}
|
||||
p += 3
|
||||
} else {
|
||||
p++
|
||||
}
|
||||
}
|
||||
return len(s)
|
||||
}
|
||||
|
||||
var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
|
||||
|
||||
// ParseAcceptLanguage parses the contents of a Accept-Language header as
|
||||
// defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
|
||||
// a list of corresponding quality weights. It is more permissive than RFC 2616
|
||||
// and may return non-nil slices even if the input is not valid.
|
||||
// The Tags will be sorted by highest weight first and then by first occurrence.
|
||||
// Tags with a weight of zero will be dropped. An error will be returned if the
|
||||
// input could not be parsed.
|
||||
func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
|
||||
var entry string
|
||||
for s != "" {
|
||||
if entry, s = split(s, ','); entry == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
entry, weight := split(entry, ';')
|
||||
|
||||
// Scan the language.
|
||||
t, err := Parse(entry)
|
||||
if err != nil {
|
||||
id, ok := acceptFallback[entry]
|
||||
if !ok {
|
||||
return nil, nil, err
|
||||
}
|
||||
t = Tag{lang: id}
|
||||
}
|
||||
|
||||
// Scan the optional weight.
|
||||
w := 1.0
|
||||
if weight != "" {
|
||||
weight = consume(weight, 'q')
|
||||
weight = consume(weight, '=')
|
||||
// consume returns the empty string when a token could not be
|
||||
// consumed, resulting in an error for ParseFloat.
|
||||
if w, err = strconv.ParseFloat(weight, 32); err != nil {
|
||||
return nil, nil, errInvalidWeight
|
||||
}
|
||||
// Drop tags with a quality weight of 0.
|
||||
if w <= 0 {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
tag = append(tag, t)
|
||||
q = append(q, float32(w))
|
||||
}
|
||||
sortStable(&tagSort{tag, q})
|
||||
return tag, q, nil
|
||||
}
|
||||
|
||||
// consume removes a leading token c from s and returns the result or the empty
|
||||
// string if there is no such token.
|
||||
func consume(s string, c byte) string {
|
||||
if s == "" || s[0] != c {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(s[1:])
|
||||
}
|
||||
|
||||
func split(s string, c byte) (head, tail string) {
|
||||
if i := strings.IndexByte(s, c); i >= 0 {
|
||||
return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:])
|
||||
}
|
||||
return strings.TrimSpace(s), ""
|
||||
}
|
||||
|
||||
// Add hack mapping to deal with a small number of cases that that occur
|
||||
// in Accept-Language (with reasonable frequency).
|
||||
var acceptFallback = map[string]langID{
|
||||
"english": _en,
|
||||
"deutsch": _de,
|
||||
"italian": _it,
|
||||
"french": _fr,
|
||||
"*": _mul, // defined in the spec to match all languages.
|
||||
}
|
||||
|
||||
type tagSort struct {
|
||||
tag []Tag
|
||||
q []float32
|
||||
}
|
||||
|
||||
func (s *tagSort) Len() int {
|
||||
return len(s.q)
|
||||
}
|
||||
|
||||
func (s *tagSort) Less(i, j int) bool {
|
||||
return s.q[i] > s.q[j]
|
||||
}
|
||||
|
||||
func (s *tagSort) Swap(i, j int) {
|
||||
s.tag[i], s.tag[j] = s.tag[j], s.tag[i]
|
||||
s.q[i], s.q[j] = s.q[j], s.q[i]
|
||||
}
|
3547
vendor/golang.org/x/text/language/tables.go
generated
vendored
3547
vendor/golang.org/x/text/language/tables.go
generated
vendored
File diff suppressed because it is too large
Load diff
143
vendor/golang.org/x/text/language/tags.go
generated
vendored
143
vendor/golang.org/x/text/language/tags.go
generated
vendored
|
@ -1,143 +0,0 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
// TODO: Various sets of commonly use tags and regions.
|
||||
|
||||
// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
|
||||
// It simplifies safe initialization of Tag values.
|
||||
func MustParse(s string) Tag {
|
||||
t, err := Parse(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
|
||||
// It simplifies safe initialization of Tag values.
|
||||
func (c CanonType) MustParse(s string) Tag {
|
||||
t, err := c.Parse(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// MustParseBase is like ParseBase, but panics if the given base cannot be parsed.
|
||||
// It simplifies safe initialization of Base values.
|
||||
func MustParseBase(s string) Base {
|
||||
b, err := ParseBase(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// MustParseScript is like ParseScript, but panics if the given script cannot be
|
||||
// parsed. It simplifies safe initialization of Script values.
|
||||
func MustParseScript(s string) Script {
|
||||
scr, err := ParseScript(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return scr
|
||||
}
|
||||
|
||||
// MustParseRegion is like ParseRegion, but panics if the given region cannot be
|
||||
// parsed. It simplifies safe initialization of Region values.
|
||||
func MustParseRegion(s string) Region {
|
||||
r, err := ParseRegion(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
var (
|
||||
und = Tag{}
|
||||
|
||||
Und Tag = Tag{}
|
||||
|
||||
Afrikaans Tag = Tag{lang: _af} // af
|
||||
Amharic Tag = Tag{lang: _am} // am
|
||||
Arabic Tag = Tag{lang: _ar} // ar
|
||||
ModernStandardArabic Tag = Tag{lang: _ar, region: _001} // ar-001
|
||||
Azerbaijani Tag = Tag{lang: _az} // az
|
||||
Bulgarian Tag = Tag{lang: _bg} // bg
|
||||
Bengali Tag = Tag{lang: _bn} // bn
|
||||
Catalan Tag = Tag{lang: _ca} // ca
|
||||
Czech Tag = Tag{lang: _cs} // cs
|
||||
Danish Tag = Tag{lang: _da} // da
|
||||
German Tag = Tag{lang: _de} // de
|
||||
Greek Tag = Tag{lang: _el} // el
|
||||
English Tag = Tag{lang: _en} // en
|
||||
AmericanEnglish Tag = Tag{lang: _en, region: _US} // en-US
|
||||
BritishEnglish Tag = Tag{lang: _en, region: _GB} // en-GB
|
||||
Spanish Tag = Tag{lang: _es} // es
|
||||
EuropeanSpanish Tag = Tag{lang: _es, region: _ES} // es-ES
|
||||
LatinAmericanSpanish Tag = Tag{lang: _es, region: _419} // es-419
|
||||
Estonian Tag = Tag{lang: _et} // et
|
||||
Persian Tag = Tag{lang: _fa} // fa
|
||||
Finnish Tag = Tag{lang: _fi} // fi
|
||||
Filipino Tag = Tag{lang: _fil} // fil
|
||||
French Tag = Tag{lang: _fr} // fr
|
||||
CanadianFrench Tag = Tag{lang: _fr, region: _CA} // fr-CA
|
||||
Gujarati Tag = Tag{lang: _gu} // gu
|
||||
Hebrew Tag = Tag{lang: _he} // he
|
||||
Hindi Tag = Tag{lang: _hi} // hi
|
||||
Croatian Tag = Tag{lang: _hr} // hr
|
||||
Hungarian Tag = Tag{lang: _hu} // hu
|
||||
Armenian Tag = Tag{lang: _hy} // hy
|
||||
Indonesian Tag = Tag{lang: _id} // id
|
||||
Icelandic Tag = Tag{lang: _is} // is
|
||||
Italian Tag = Tag{lang: _it} // it
|
||||
Japanese Tag = Tag{lang: _ja} // ja
|
||||
Georgian Tag = Tag{lang: _ka} // ka
|
||||
Kazakh Tag = Tag{lang: _kk} // kk
|
||||
Khmer Tag = Tag{lang: _km} // km
|
||||
Kannada Tag = Tag{lang: _kn} // kn
|
||||
Korean Tag = Tag{lang: _ko} // ko
|
||||
Kirghiz Tag = Tag{lang: _ky} // ky
|
||||
Lao Tag = Tag{lang: _lo} // lo
|
||||
Lithuanian Tag = Tag{lang: _lt} // lt
|
||||
Latvian Tag = Tag{lang: _lv} // lv
|
||||
Macedonian Tag = Tag{lang: _mk} // mk
|
||||
Malayalam Tag = Tag{lang: _ml} // ml
|
||||
Mongolian Tag = Tag{lang: _mn} // mn
|
||||
Marathi Tag = Tag{lang: _mr} // mr
|
||||
Malay Tag = Tag{lang: _ms} // ms
|
||||
Burmese Tag = Tag{lang: _my} // my
|
||||
Nepali Tag = Tag{lang: _ne} // ne
|
||||
Dutch Tag = Tag{lang: _nl} // nl
|
||||
Norwegian Tag = Tag{lang: _no} // no
|
||||
Punjabi Tag = Tag{lang: _pa} // pa
|
||||
Polish Tag = Tag{lang: _pl} // pl
|
||||
Portuguese Tag = Tag{lang: _pt} // pt
|
||||
BrazilianPortuguese Tag = Tag{lang: _pt, region: _BR} // pt-BR
|
||||
EuropeanPortuguese Tag = Tag{lang: _pt, region: _PT} // pt-PT
|
||||
Romanian Tag = Tag{lang: _ro} // ro
|
||||
Russian Tag = Tag{lang: _ru} // ru
|
||||
Sinhala Tag = Tag{lang: _si} // si
|
||||
Slovak Tag = Tag{lang: _sk} // sk
|
||||
Slovenian Tag = Tag{lang: _sl} // sl
|
||||
Albanian Tag = Tag{lang: _sq} // sq
|
||||
Serbian Tag = Tag{lang: _sr} // sr
|
||||
SerbianLatin Tag = Tag{lang: _sr, script: _Latn} // sr-Latn
|
||||
Swedish Tag = Tag{lang: _sv} // sv
|
||||
Swahili Tag = Tag{lang: _sw} // sw
|
||||
Tamil Tag = Tag{lang: _ta} // ta
|
||||
Telugu Tag = Tag{lang: _te} // te
|
||||
Thai Tag = Tag{lang: _th} // th
|
||||
Turkish Tag = Tag{lang: _tr} // tr
|
||||
Ukrainian Tag = Tag{lang: _uk} // uk
|
||||
Urdu Tag = Tag{lang: _ur} // ur
|
||||
Uzbek Tag = Tag{lang: _uz} // uz
|
||||
Vietnamese Tag = Tag{lang: _vi} // vi
|
||||
Chinese Tag = Tag{lang: _zh} // zh
|
||||
SimplifiedChinese Tag = Tag{lang: _zh, script: _Hans} // zh-Hans
|
||||
TraditionalChinese Tag = Tag{lang: _zh, script: _Hant} // zh-Hant
|
||||
Zulu Tag = Tag{lang: _zu} // zu
|
||||
)
|
187
vendor/golang.org/x/text/runes/cond.go
generated
vendored
187
vendor/golang.org/x/text/runes/cond.go
generated
vendored
|
@ -1,187 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package runes
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// Note: below we pass invalid UTF-8 to the tIn and tNotIn transformers as is.
|
||||
// This is done for various reasons:
|
||||
// - To retain the semantics of the Nop transformer: if input is passed to a Nop
|
||||
// one would expect it to be unchanged.
|
||||
// - It would be very expensive to pass a converted RuneError to a transformer:
|
||||
// a transformer might need more source bytes after RuneError, meaning that
|
||||
// the only way to pass it safely is to create a new buffer and manage the
|
||||
// intermingling of RuneErrors and normal input.
|
||||
// - Many transformers leave ill-formed UTF-8 as is, so this is not
|
||||
// inconsistent. Generally ill-formed UTF-8 is only replaced if it is a
|
||||
// logical consequence of the operation (as for Map) or if it otherwise would
|
||||
// pose security concerns (as for Remove).
|
||||
// - An alternative would be to return an error on ill-formed UTF-8, but this
|
||||
// would be inconsistent with other operations.
|
||||
|
||||
// If returns a transformer that applies tIn to consecutive runes for which
|
||||
// s.Contains(r) and tNotIn to consecutive runes for which !s.Contains(r). Reset
|
||||
// is called on tIn and tNotIn at the start of each run. A Nop transformer will
|
||||
// substitute a nil value passed to tIn or tNotIn. Invalid UTF-8 is translated
|
||||
// to RuneError to determine which transformer to apply, but is passed as is to
|
||||
// the respective transformer.
|
||||
func If(s Set, tIn, tNotIn transform.Transformer) Transformer {
|
||||
if tIn == nil && tNotIn == nil {
|
||||
return Transformer{transform.Nop}
|
||||
}
|
||||
if tIn == nil {
|
||||
tIn = transform.Nop
|
||||
}
|
||||
if tNotIn == nil {
|
||||
tNotIn = transform.Nop
|
||||
}
|
||||
sIn, ok := tIn.(transform.SpanningTransformer)
|
||||
if !ok {
|
||||
sIn = dummySpan{tIn}
|
||||
}
|
||||
sNotIn, ok := tNotIn.(transform.SpanningTransformer)
|
||||
if !ok {
|
||||
sNotIn = dummySpan{tNotIn}
|
||||
}
|
||||
|
||||
a := &cond{
|
||||
tIn: sIn,
|
||||
tNotIn: sNotIn,
|
||||
f: s.Contains,
|
||||
}
|
||||
a.Reset()
|
||||
return Transformer{a}
|
||||
}
|
||||
|
||||
type dummySpan struct{ transform.Transformer }
|
||||
|
||||
func (d dummySpan) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
return 0, transform.ErrEndOfSpan
|
||||
}
|
||||
|
||||
type cond struct {
|
||||
tIn, tNotIn transform.SpanningTransformer
|
||||
f func(rune) bool
|
||||
check func(rune) bool // current check to perform
|
||||
t transform.SpanningTransformer // current transformer to use
|
||||
}
|
||||
|
||||
// Reset implements transform.Transformer.
|
||||
func (t *cond) Reset() {
|
||||
t.check = t.is
|
||||
t.t = t.tIn
|
||||
t.t.Reset() // notIn will be reset on first usage.
|
||||
}
|
||||
|
||||
func (t *cond) is(r rune) bool {
|
||||
if t.f(r) {
|
||||
return true
|
||||
}
|
||||
t.check = t.isNot
|
||||
t.t = t.tNotIn
|
||||
t.tNotIn.Reset()
|
||||
return false
|
||||
}
|
||||
|
||||
func (t *cond) isNot(r rune) bool {
|
||||
if !t.f(r) {
|
||||
return true
|
||||
}
|
||||
t.check = t.is
|
||||
t.t = t.tIn
|
||||
t.tIn.Reset()
|
||||
return false
|
||||
}
|
||||
|
||||
// This implementation of Span doesn't help all too much, but it needs to be
|
||||
// there to satisfy this package's Transformer interface.
|
||||
// TODO: there are certainly room for improvements, though. For example, if
|
||||
// t.t == transform.Nop (which will a common occurrence) it will save a bundle
|
||||
// to special-case that loop.
|
||||
func (t *cond) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
p := 0
|
||||
for n < len(src) && err == nil {
|
||||
// Don't process too much at a time as the Spanner that will be
|
||||
// called on this block may terminate early.
|
||||
const maxChunk = 4096
|
||||
max := len(src)
|
||||
if v := n + maxChunk; v < max {
|
||||
max = v
|
||||
}
|
||||
atEnd := false
|
||||
size := 0
|
||||
current := t.t
|
||||
for ; p < max; p += size {
|
||||
r := rune(src[p])
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
} else if r, size = utf8.DecodeRune(src[p:]); size == 1 {
|
||||
if !atEOF && !utf8.FullRune(src[p:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
if !t.check(r) {
|
||||
// The next rune will be the start of a new run.
|
||||
atEnd = true
|
||||
break
|
||||
}
|
||||
}
|
||||
n2, err2 := current.Span(src[n:p], atEnd || (atEOF && p == len(src)))
|
||||
n += n2
|
||||
if err2 != nil {
|
||||
return n, err2
|
||||
}
|
||||
// At this point either err != nil or t.check will pass for the rune at p.
|
||||
p = n + size
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (t *cond) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
p := 0
|
||||
for nSrc < len(src) && err == nil {
|
||||
// Don't process too much at a time, as the work might be wasted if the
|
||||
// destination buffer isn't large enough to hold the result or a
|
||||
// transform returns an error early.
|
||||
const maxChunk = 4096
|
||||
max := len(src)
|
||||
if n := nSrc + maxChunk; n < len(src) {
|
||||
max = n
|
||||
}
|
||||
atEnd := false
|
||||
size := 0
|
||||
current := t.t
|
||||
for ; p < max; p += size {
|
||||
r := rune(src[p])
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
} else if r, size = utf8.DecodeRune(src[p:]); size == 1 {
|
||||
if !atEOF && !utf8.FullRune(src[p:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
if !t.check(r) {
|
||||
// The next rune will be the start of a new run.
|
||||
atEnd = true
|
||||
break
|
||||
}
|
||||
}
|
||||
nDst2, nSrc2, err2 := current.Transform(dst[nDst:], src[nSrc:p], atEnd || (atEOF && p == len(src)))
|
||||
nDst += nDst2
|
||||
nSrc += nSrc2
|
||||
if err2 != nil {
|
||||
return nDst, nSrc, err2
|
||||
}
|
||||
// At this point either err != nil or t.check will pass for the rune at p.
|
||||
p = nSrc + size
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
355
vendor/golang.org/x/text/runes/runes.go
generated
vendored
355
vendor/golang.org/x/text/runes/runes.go
generated
vendored
|
@ -1,355 +0,0 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package runes provide transforms for UTF-8 encoded text.
|
||||
package runes // import "golang.org/x/text/runes"
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// A Set is a collection of runes.
|
||||
type Set interface {
|
||||
// Contains returns true if r is contained in the set.
|
||||
Contains(r rune) bool
|
||||
}
|
||||
|
||||
type setFunc func(rune) bool
|
||||
|
||||
func (s setFunc) Contains(r rune) bool {
|
||||
return s(r)
|
||||
}
|
||||
|
||||
// Note: using funcs here instead of wrapping types result in cleaner
|
||||
// documentation and a smaller API.
|
||||
|
||||
// In creates a Set with a Contains method that returns true for all runes in
|
||||
// the given RangeTable.
|
||||
func In(rt *unicode.RangeTable) Set {
|
||||
return setFunc(func(r rune) bool { return unicode.Is(rt, r) })
|
||||
}
|
||||
|
||||
// In creates a Set with a Contains method that returns true for all runes not
|
||||
// in the given RangeTable.
|
||||
func NotIn(rt *unicode.RangeTable) Set {
|
||||
return setFunc(func(r rune) bool { return !unicode.Is(rt, r) })
|
||||
}
|
||||
|
||||
// Predicate creates a Set with a Contains method that returns f(r).
|
||||
func Predicate(f func(rune) bool) Set {
|
||||
return setFunc(f)
|
||||
}
|
||||
|
||||
// Transformer implements the transform.Transformer interface.
|
||||
type Transformer struct {
|
||||
t transform.SpanningTransformer
|
||||
}
|
||||
|
||||
func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
return t.t.Transform(dst, src, atEOF)
|
||||
}
|
||||
|
||||
func (t Transformer) Span(b []byte, atEOF bool) (n int, err error) {
|
||||
return t.t.Span(b, atEOF)
|
||||
}
|
||||
|
||||
func (t Transformer) Reset() { t.t.Reset() }
|
||||
|
||||
// Bytes returns a new byte slice with the result of converting b using t. It
|
||||
// calls Reset on t. It returns nil if any error was found. This can only happen
|
||||
// if an error-producing Transformer is passed to If.
|
||||
func (t Transformer) Bytes(b []byte) []byte {
|
||||
b, _, err := transform.Bytes(t, b)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// String returns a string with the result of converting s using t. It calls
|
||||
// Reset on t. It returns the empty string if any error was found. This can only
|
||||
// happen if an error-producing Transformer is passed to If.
|
||||
func (t Transformer) String(s string) string {
|
||||
s, _, err := transform.String(t, s)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// TODO:
|
||||
// - Copy: copying strings and bytes in whole-rune units.
|
||||
// - Validation (maybe)
|
||||
// - Well-formed-ness (maybe)
|
||||
|
||||
const runeErrorString = string(utf8.RuneError)
|
||||
|
||||
// Remove returns a Transformer that removes runes r for which s.Contains(r).
|
||||
// Illegal input bytes are replaced by RuneError before being passed to f.
|
||||
func Remove(s Set) Transformer {
|
||||
if f, ok := s.(setFunc); ok {
|
||||
// This little trick cuts the running time of BenchmarkRemove for sets
|
||||
// created by Predicate roughly in half.
|
||||
// TODO: special-case RangeTables as well.
|
||||
return Transformer{remove(f)}
|
||||
}
|
||||
return Transformer{remove(s.Contains)}
|
||||
}
|
||||
|
||||
// TODO: remove transform.RemoveFunc.
|
||||
|
||||
type remove func(r rune) bool
|
||||
|
||||
func (remove) Reset() {}
|
||||
|
||||
// Span implements transform.Spanner.
|
||||
func (t remove) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
for r, size := rune(0), 0; n < len(src); {
|
||||
if r = rune(src[n]); r < utf8.RuneSelf {
|
||||
size = 1
|
||||
} else if r, size = utf8.DecodeRune(src[n:]); size == 1 {
|
||||
// Invalid rune.
|
||||
if !atEOF && !utf8.FullRune(src[n:]) {
|
||||
err = transform.ErrShortSrc
|
||||
} else {
|
||||
err = transform.ErrEndOfSpan
|
||||
}
|
||||
break
|
||||
}
|
||||
if t(r) {
|
||||
err = transform.ErrEndOfSpan
|
||||
break
|
||||
}
|
||||
n += size
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Transform implements transform.Transformer.
|
||||
func (t remove) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
for r, size := rune(0), 0; nSrc < len(src); {
|
||||
if r = rune(src[nSrc]); r < utf8.RuneSelf {
|
||||
size = 1
|
||||
} else if r, size = utf8.DecodeRune(src[nSrc:]); size == 1 {
|
||||
// Invalid rune.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
// We replace illegal bytes with RuneError. Not doing so might
|
||||
// otherwise turn a sequence of invalid UTF-8 into valid UTF-8.
|
||||
// The resulting byte sequence may subsequently contain runes
|
||||
// for which t(r) is true that were passed unnoticed.
|
||||
if !t(utf8.RuneError) {
|
||||
if nDst+3 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = runeErrorString[0]
|
||||
dst[nDst+1] = runeErrorString[1]
|
||||
dst[nDst+2] = runeErrorString[2]
|
||||
nDst += 3
|
||||
}
|
||||
nSrc++
|
||||
continue
|
||||
}
|
||||
if t(r) {
|
||||
nSrc += size
|
||||
continue
|
||||
}
|
||||
if nDst+size > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
for i := 0; i < size; i++ {
|
||||
dst[nDst] = src[nSrc]
|
||||
nDst++
|
||||
nSrc++
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Map returns a Transformer that maps the runes in the input using the given
|
||||
// mapping. Illegal bytes in the input are converted to utf8.RuneError before
|
||||
// being passed to the mapping func.
|
||||
func Map(mapping func(rune) rune) Transformer {
|
||||
return Transformer{mapper(mapping)}
|
||||
}
|
||||
|
||||
type mapper func(rune) rune
|
||||
|
||||
func (mapper) Reset() {}
|
||||
|
||||
// Span implements transform.Spanner.
|
||||
func (t mapper) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
for r, size := rune(0), 0; n < len(src); n += size {
|
||||
if r = rune(src[n]); r < utf8.RuneSelf {
|
||||
size = 1
|
||||
} else if r, size = utf8.DecodeRune(src[n:]); size == 1 {
|
||||
// Invalid rune.
|
||||
if !atEOF && !utf8.FullRune(src[n:]) {
|
||||
err = transform.ErrShortSrc
|
||||
} else {
|
||||
err = transform.ErrEndOfSpan
|
||||
}
|
||||
break
|
||||
}
|
||||
if t(r) != r {
|
||||
err = transform.ErrEndOfSpan
|
||||
break
|
||||
}
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
// Transform implements transform.Transformer.
|
||||
func (t mapper) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
var replacement rune
|
||||
var b [utf8.UTFMax]byte
|
||||
|
||||
for r, size := rune(0), 0; nSrc < len(src); {
|
||||
if r = rune(src[nSrc]); r < utf8.RuneSelf {
|
||||
if replacement = t(r); replacement < utf8.RuneSelf {
|
||||
if nDst == len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = byte(replacement)
|
||||
nDst++
|
||||
nSrc++
|
||||
continue
|
||||
}
|
||||
size = 1
|
||||
} else if r, size = utf8.DecodeRune(src[nSrc:]); size == 1 {
|
||||
// Invalid rune.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
|
||||
if replacement = t(utf8.RuneError); replacement == utf8.RuneError {
|
||||
if nDst+3 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = runeErrorString[0]
|
||||
dst[nDst+1] = runeErrorString[1]
|
||||
dst[nDst+2] = runeErrorString[2]
|
||||
nDst += 3
|
||||
nSrc++
|
||||
continue
|
||||
}
|
||||
} else if replacement = t(r); replacement == r {
|
||||
if nDst+size > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
for i := 0; i < size; i++ {
|
||||
dst[nDst] = src[nSrc]
|
||||
nDst++
|
||||
nSrc++
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
n := utf8.EncodeRune(b[:], replacement)
|
||||
|
||||
if nDst+n > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
for i := 0; i < n; i++ {
|
||||
dst[nDst] = b[i]
|
||||
nDst++
|
||||
}
|
||||
nSrc += size
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// ReplaceIllFormed returns a transformer that replaces all input bytes that are
|
||||
// not part of a well-formed UTF-8 code sequence with utf8.RuneError.
|
||||
func ReplaceIllFormed() Transformer {
|
||||
return Transformer{&replaceIllFormed{}}
|
||||
}
|
||||
|
||||
type replaceIllFormed struct{ transform.NopResetter }
|
||||
|
||||
func (t replaceIllFormed) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
for n < len(src) {
|
||||
// ASCII fast path.
|
||||
if src[n] < utf8.RuneSelf {
|
||||
n++
|
||||
continue
|
||||
}
|
||||
|
||||
r, size := utf8.DecodeRune(src[n:])
|
||||
|
||||
// Look for a valid non-ASCII rune.
|
||||
if r != utf8.RuneError || size != 1 {
|
||||
n += size
|
||||
continue
|
||||
}
|
||||
|
||||
// Look for short source data.
|
||||
if !atEOF && !utf8.FullRune(src[n:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
|
||||
// We have an invalid rune.
|
||||
err = transform.ErrEndOfSpan
|
||||
break
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (t replaceIllFormed) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
for nSrc < len(src) {
|
||||
// ASCII fast path.
|
||||
if r := src[nSrc]; r < utf8.RuneSelf {
|
||||
if nDst == len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = r
|
||||
nDst++
|
||||
nSrc++
|
||||
continue
|
||||
}
|
||||
|
||||
// Look for a valid non-ASCII rune.
|
||||
if _, size := utf8.DecodeRune(src[nSrc:]); size != 1 {
|
||||
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
nDst += size
|
||||
nSrc += size
|
||||
continue
|
||||
}
|
||||
|
||||
// Look for short source data.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
|
||||
// We have an invalid rune.
|
||||
if nDst+3 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = runeErrorString[0]
|
||||
dst[nDst+1] = runeErrorString[1]
|
||||
dst[nDst+2] = runeErrorString[2]
|
||||
nDst += 3
|
||||
nSrc++
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
36
vendor/golang.org/x/text/secure/precis/class.go
generated
vendored
36
vendor/golang.org/x/text/secure/precis/class.go
generated
vendored
|
@ -1,36 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package precis
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// TODO: Add contextual character rules from Appendix A of RFC5892.
|
||||
|
||||
// A class is a set of characters that match certain derived properties. The
|
||||
// PRECIS framework defines two classes: The Freeform class and the Identifier
|
||||
// class. The freeform class should be used for profiles where expressiveness is
|
||||
// prioritized over safety such as nicknames or passwords. The identifier class
|
||||
// should be used for profiles where safety is the first priority such as
|
||||
// addressable network labels and usernames.
|
||||
type class struct {
|
||||
validFrom property
|
||||
}
|
||||
|
||||
// Contains satisfies the runes.Set interface and returns whether the given rune
|
||||
// is a member of the class.
|
||||
func (c class) Contains(r rune) bool {
|
||||
b := make([]byte, 4)
|
||||
n := utf8.EncodeRune(b, r)
|
||||
|
||||
trieval, _ := dpTrie.lookup(b[:n])
|
||||
return c.validFrom <= property(trieval)
|
||||
}
|
||||
|
||||
var (
|
||||
identifier = &class{validFrom: pValid}
|
||||
freeform = &class{validFrom: idDisOrFreePVal}
|
||||
)
|
139
vendor/golang.org/x/text/secure/precis/context.go
generated
vendored
139
vendor/golang.org/x/text/secure/precis/context.go
generated
vendored
|
@ -1,139 +0,0 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package precis
|
||||
|
||||
import "errors"
|
||||
|
||||
// This file contains tables and code related to context rules.
|
||||
|
||||
type catBitmap uint16
|
||||
|
||||
const (
|
||||
// These bits, once set depending on the current value, are never unset.
|
||||
bJapanese catBitmap = 1 << iota
|
||||
bArabicIndicDigit
|
||||
bExtendedArabicIndicDigit
|
||||
|
||||
// These bits are set on each iteration depending on the current value.
|
||||
bJoinStart
|
||||
bJoinMid
|
||||
bJoinEnd
|
||||
bVirama
|
||||
bLatinSmallL
|
||||
bGreek
|
||||
bHebrew
|
||||
|
||||
// These bits indicated which of the permanent bits need to be set at the
|
||||
// end of the checks.
|
||||
bMustHaveJapn
|
||||
|
||||
permanent = bJapanese | bArabicIndicDigit | bExtendedArabicIndicDigit | bMustHaveJapn
|
||||
)
|
||||
|
||||
const finalShift = 10
|
||||
|
||||
var errContext = errors.New("precis: contextual rule violated")
|
||||
|
||||
func init() {
|
||||
// Programmatically set these required bits as, manually setting them seems
|
||||
// too error prone.
|
||||
for i, ct := range categoryTransitions {
|
||||
categoryTransitions[i].keep |= permanent
|
||||
categoryTransitions[i].accept |= ct.term
|
||||
}
|
||||
}
|
||||
|
||||
var categoryTransitions = []struct {
|
||||
keep catBitmap // mask selecting which bits to keep from the previous state
|
||||
set catBitmap // mask for which bits to set for this transition
|
||||
|
||||
// These bitmaps are used for rules that require lookahead.
|
||||
// term&accept == term must be true, which is enforced programmatically.
|
||||
term catBitmap // bits accepted as termination condition
|
||||
accept catBitmap // bits that pass, but not sufficient as termination
|
||||
|
||||
// The rule function cannot take a *context as an argument, as it would
|
||||
// cause the context to escape, adding significant overhead.
|
||||
rule func(beforeBits catBitmap) (doLookahead bool, err error)
|
||||
}{
|
||||
joiningL: {set: bJoinStart},
|
||||
joiningD: {set: bJoinStart | bJoinEnd},
|
||||
joiningT: {keep: bJoinStart, set: bJoinMid},
|
||||
joiningR: {set: bJoinEnd},
|
||||
viramaModifier: {set: bVirama},
|
||||
viramaJoinT: {set: bVirama | bJoinMid},
|
||||
latinSmallL: {set: bLatinSmallL},
|
||||
greek: {set: bGreek},
|
||||
greekJoinT: {set: bGreek | bJoinMid},
|
||||
hebrew: {set: bHebrew},
|
||||
hebrewJoinT: {set: bHebrew | bJoinMid},
|
||||
japanese: {set: bJapanese},
|
||||
katakanaMiddleDot: {set: bMustHaveJapn},
|
||||
|
||||
zeroWidthNonJoiner: {
|
||||
term: bJoinEnd,
|
||||
accept: bJoinMid,
|
||||
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||
if before&bVirama != 0 {
|
||||
return false, nil
|
||||
}
|
||||
if before&bJoinStart == 0 {
|
||||
return false, errContext
|
||||
}
|
||||
return true, nil
|
||||
},
|
||||
},
|
||||
zeroWidthJoiner: {
|
||||
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||
if before&bVirama == 0 {
|
||||
err = errContext
|
||||
}
|
||||
return false, err
|
||||
},
|
||||
},
|
||||
middleDot: {
|
||||
term: bLatinSmallL,
|
||||
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||
if before&bLatinSmallL == 0 {
|
||||
return false, errContext
|
||||
}
|
||||
return true, nil
|
||||
},
|
||||
},
|
||||
greekLowerNumeralSign: {
|
||||
set: bGreek,
|
||||
term: bGreek,
|
||||
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||
return true, nil
|
||||
},
|
||||
},
|
||||
hebrewPreceding: {
|
||||
set: bHebrew,
|
||||
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||
if before&bHebrew == 0 {
|
||||
err = errContext
|
||||
}
|
||||
return false, err
|
||||
},
|
||||
},
|
||||
arabicIndicDigit: {
|
||||
set: bArabicIndicDigit,
|
||||
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||
if before&bExtendedArabicIndicDigit != 0 {
|
||||
err = errContext
|
||||
}
|
||||
return false, err
|
||||
},
|
||||
},
|
||||
extendedArabicIndicDigit: {
|
||||
set: bExtendedArabicIndicDigit,
|
||||
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||
if before&bArabicIndicDigit != 0 {
|
||||
err = errContext
|
||||
}
|
||||
return false, err
|
||||
},
|
||||
},
|
||||
}
|
14
vendor/golang.org/x/text/secure/precis/doc.go
generated
vendored
14
vendor/golang.org/x/text/secure/precis/doc.go
generated
vendored
|
@ -1,14 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package precis contains types and functions for the preparation,
|
||||
// enforcement, and comparison of internationalized strings ("PRECIS") as
|
||||
// defined in RFC 7564. It also contains several pre-defined profiles for
|
||||
// passwords, nicknames, and usernames as defined in RFC 7613 and RFC 7700.
|
||||
//
|
||||
// BE ADVISED: This package is under construction and the API may change in
|
||||
// backwards incompatible ways and without notice.
|
||||
package precis // import "golang.org/x/text/secure/precis"
|
||||
|
||||
//go:generate go run gen.go gen_trieval.go
|
310
vendor/golang.org/x/text/secure/precis/gen.go
generated
vendored
310
vendor/golang.org/x/text/secure/precis/gen.go
generated
vendored
|
@ -1,310 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Unicode table generator.
|
||||
// Data read from the web.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"log"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/triegen"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
"golang.org/x/text/unicode/rangetable"
|
||||
)
|
||||
|
||||
var outputFile = flag.String("output", "tables.go", "output file for generated tables; default tables.go")
|
||||
|
||||
var assigned, disallowedRunes *unicode.RangeTable
|
||||
|
||||
var runeCategory = map[rune]category{}
|
||||
|
||||
var overrides = map[category]category{
|
||||
viramaModifier: viramaJoinT,
|
||||
greek: greekJoinT,
|
||||
hebrew: hebrewJoinT,
|
||||
}
|
||||
|
||||
func setCategory(r rune, cat category) {
|
||||
if c, ok := runeCategory[r]; ok {
|
||||
if override, ok := overrides[c]; cat == joiningT && ok {
|
||||
cat = override
|
||||
} else {
|
||||
log.Fatalf("%U: multiple categories for rune (%v and %v)", r, c, cat)
|
||||
}
|
||||
}
|
||||
runeCategory[r] = cat
|
||||
}
|
||||
|
||||
func init() {
|
||||
if numCategories > 1<<propShift {
|
||||
log.Fatalf("Number of categories is %d; may at most be %d", numCategories, 1<<propShift)
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
|
||||
// Load data
|
||||
runes := []rune{}
|
||||
// PrecisIgnorableProperties: https://tools.ietf.org/html/rfc7564#section-9.13
|
||||
ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) {
|
||||
if p.String(1) == "Default_Ignorable_Code_Point" {
|
||||
runes = append(runes, p.Rune(0))
|
||||
}
|
||||
})
|
||||
ucd.Parse(gen.OpenUCDFile("PropList.txt"), func(p *ucd.Parser) {
|
||||
switch p.String(1) {
|
||||
case "Noncharacter_Code_Point":
|
||||
runes = append(runes, p.Rune(0))
|
||||
}
|
||||
})
|
||||
// OldHangulJamo: https://tools.ietf.org/html/rfc5892#section-2.9
|
||||
ucd.Parse(gen.OpenUCDFile("HangulSyllableType.txt"), func(p *ucd.Parser) {
|
||||
switch p.String(1) {
|
||||
case "L", "V", "T":
|
||||
runes = append(runes, p.Rune(0))
|
||||
}
|
||||
})
|
||||
|
||||
disallowedRunes = rangetable.New(runes...)
|
||||
assigned = rangetable.Assigned(unicode.Version)
|
||||
|
||||
// Load category data.
|
||||
runeCategory['l'] = latinSmallL
|
||||
ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
|
||||
const cccVirama = 9
|
||||
if p.Int(ucd.CanonicalCombiningClass) == cccVirama {
|
||||
setCategory(p.Rune(0), viramaModifier)
|
||||
}
|
||||
})
|
||||
ucd.Parse(gen.OpenUCDFile("Scripts.txt"), func(p *ucd.Parser) {
|
||||
switch p.String(1) {
|
||||
case "Greek":
|
||||
setCategory(p.Rune(0), greek)
|
||||
case "Hebrew":
|
||||
setCategory(p.Rune(0), hebrew)
|
||||
case "Hiragana", "Katakana", "Han":
|
||||
setCategory(p.Rune(0), japanese)
|
||||
}
|
||||
})
|
||||
|
||||
// Set the rule categories associated with exceptions. This overrides any
|
||||
// previously set categories. The original categories are manually
|
||||
// reintroduced in the categoryTransitions table.
|
||||
for r, e := range exceptions {
|
||||
if e.cat != 0 {
|
||||
runeCategory[r] = e.cat
|
||||
}
|
||||
}
|
||||
cat := map[string]category{
|
||||
"L": joiningL,
|
||||
"D": joiningD,
|
||||
"T": joiningT,
|
||||
|
||||
"R": joiningR,
|
||||
}
|
||||
ucd.Parse(gen.OpenUCDFile("extracted/DerivedJoiningType.txt"), func(p *ucd.Parser) {
|
||||
switch v := p.String(1); v {
|
||||
case "L", "D", "T", "R":
|
||||
setCategory(p.Rune(0), cat[v])
|
||||
}
|
||||
})
|
||||
|
||||
writeTables()
|
||||
gen.Repackage("gen_trieval.go", "trieval.go", "precis")
|
||||
}
|
||||
|
||||
type exception struct {
|
||||
prop property
|
||||
cat category
|
||||
}
|
||||
|
||||
func init() {
|
||||
// Programmatically add the Arabic and Indic digits to the exceptions map.
|
||||
// See comment in the exceptions map below why these are marked disallowed.
|
||||
for i := rune(0); i <= 9; i++ {
|
||||
exceptions[0x0660+i] = exception{
|
||||
prop: disallowed,
|
||||
cat: arabicIndicDigit,
|
||||
}
|
||||
exceptions[0x06F0+i] = exception{
|
||||
prop: disallowed,
|
||||
cat: extendedArabicIndicDigit,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The Exceptions class as defined in RFC 5892
|
||||
// https://tools.ietf.org/html/rfc5892#section-2.6
|
||||
var exceptions = map[rune]exception{
|
||||
0x00DF: {prop: pValid},
|
||||
0x03C2: {prop: pValid},
|
||||
0x06FD: {prop: pValid},
|
||||
0x06FE: {prop: pValid},
|
||||
0x0F0B: {prop: pValid},
|
||||
0x3007: {prop: pValid},
|
||||
|
||||
// ContextO|J rules are marked as disallowed, taking a "guilty until proven
|
||||
// innocent" approach. The main reason for this is that the check for
|
||||
// whether a context rule should be applied can be moved to the logic for
|
||||
// handing disallowed runes, taken it off the common path. The exception to
|
||||
// this rule is for katakanaMiddleDot, as the rule logic is handled without
|
||||
// using a rule function.
|
||||
|
||||
// ContextJ (Join control)
|
||||
0x200C: {prop: disallowed, cat: zeroWidthNonJoiner},
|
||||
0x200D: {prop: disallowed, cat: zeroWidthJoiner},
|
||||
|
||||
// ContextO
|
||||
0x00B7: {prop: disallowed, cat: middleDot},
|
||||
0x0375: {prop: disallowed, cat: greekLowerNumeralSign},
|
||||
0x05F3: {prop: disallowed, cat: hebrewPreceding}, // punctuation Geresh
|
||||
0x05F4: {prop: disallowed, cat: hebrewPreceding}, // punctuation Gershayim
|
||||
0x30FB: {prop: pValid, cat: katakanaMiddleDot},
|
||||
|
||||
// These are officially ContextO, but the implementation does not require
|
||||
// special treatment of these, so we simply mark them as valid.
|
||||
0x0660: {prop: pValid},
|
||||
0x0661: {prop: pValid},
|
||||
0x0662: {prop: pValid},
|
||||
0x0663: {prop: pValid},
|
||||
0x0664: {prop: pValid},
|
||||
0x0665: {prop: pValid},
|
||||
0x0666: {prop: pValid},
|
||||
0x0667: {prop: pValid},
|
||||
0x0668: {prop: pValid},
|
||||
0x0669: {prop: pValid},
|
||||
0x06F0: {prop: pValid},
|
||||
0x06F1: {prop: pValid},
|
||||
0x06F2: {prop: pValid},
|
||||
0x06F3: {prop: pValid},
|
||||
0x06F4: {prop: pValid},
|
||||
0x06F5: {prop: pValid},
|
||||
0x06F6: {prop: pValid},
|
||||
0x06F7: {prop: pValid},
|
||||
0x06F8: {prop: pValid},
|
||||
0x06F9: {prop: pValid},
|
||||
|
||||
0x0640: {prop: disallowed},
|
||||
0x07FA: {prop: disallowed},
|
||||
0x302E: {prop: disallowed},
|
||||
0x302F: {prop: disallowed},
|
||||
0x3031: {prop: disallowed},
|
||||
0x3032: {prop: disallowed},
|
||||
0x3033: {prop: disallowed},
|
||||
0x3034: {prop: disallowed},
|
||||
0x3035: {prop: disallowed},
|
||||
0x303B: {prop: disallowed},
|
||||
}
|
||||
|
||||
// LetterDigits: https://tools.ietf.org/html/rfc5892#section-2.1
|
||||
// r in {Ll, Lu, Lo, Nd, Lm, Mn, Mc}.
|
||||
func isLetterDigits(r rune) bool {
|
||||
return unicode.In(r,
|
||||
unicode.Ll, unicode.Lu, unicode.Lm, unicode.Lo, // Letters
|
||||
unicode.Mn, unicode.Mc, // Modifiers
|
||||
unicode.Nd, // Digits
|
||||
)
|
||||
}
|
||||
|
||||
func isIdDisAndFreePVal(r rune) bool {
|
||||
return unicode.In(r,
|
||||
// OtherLetterDigits: https://tools.ietf.org/html/rfc7564#section-9.18
|
||||
// r in in {Lt, Nl, No, Me}
|
||||
unicode.Lt, unicode.Nl, unicode.No, // Other letters / numbers
|
||||
unicode.Me, // Modifiers
|
||||
|
||||
// Spaces: https://tools.ietf.org/html/rfc7564#section-9.14
|
||||
// r in in {Zs}
|
||||
unicode.Zs,
|
||||
|
||||
// Symbols: https://tools.ietf.org/html/rfc7564#section-9.15
|
||||
// r in {Sm, Sc, Sk, So}
|
||||
unicode.Sm, unicode.Sc, unicode.Sk, unicode.So,
|
||||
|
||||
// Punctuation: https://tools.ietf.org/html/rfc7564#section-9.16
|
||||
// r in {Pc, Pd, Ps, Pe, Pi, Pf, Po}
|
||||
unicode.Pc, unicode.Pd, unicode.Ps, unicode.Pe,
|
||||
unicode.Pi, unicode.Pf, unicode.Po,
|
||||
)
|
||||
}
|
||||
|
||||
// HasCompat: https://tools.ietf.org/html/rfc7564#section-9.17
|
||||
func hasCompat(r rune) bool {
|
||||
return !norm.NFKC.IsNormalString(string(r))
|
||||
}
|
||||
|
||||
// From https://tools.ietf.org/html/rfc5892:
|
||||
//
|
||||
// If .cp. .in. Exceptions Then Exceptions(cp);
|
||||
// Else If .cp. .in. BackwardCompatible Then BackwardCompatible(cp);
|
||||
// Else If .cp. .in. Unassigned Then UNASSIGNED;
|
||||
// Else If .cp. .in. ASCII7 Then PVALID;
|
||||
// Else If .cp. .in. JoinControl Then CONTEXTJ;
|
||||
// Else If .cp. .in. OldHangulJamo Then DISALLOWED;
|
||||
// Else If .cp. .in. PrecisIgnorableProperties Then DISALLOWED;
|
||||
// Else If .cp. .in. Controls Then DISALLOWED;
|
||||
// Else If .cp. .in. HasCompat Then ID_DIS or FREE_PVAL;
|
||||
// Else If .cp. .in. LetterDigits Then PVALID;
|
||||
// Else If .cp. .in. OtherLetterDigits Then ID_DIS or FREE_PVAL;
|
||||
// Else If .cp. .in. Spaces Then ID_DIS or FREE_PVAL;
|
||||
// Else If .cp. .in. Symbols Then ID_DIS or FREE_PVAL;
|
||||
// Else If .cp. .in. Punctuation Then ID_DIS or FREE_PVAL;
|
||||
// Else DISALLOWED;
|
||||
|
||||
func writeTables() {
|
||||
propTrie := triegen.NewTrie("derivedProperties")
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile(*outputFile, "precis")
|
||||
gen.WriteUnicodeVersion(w)
|
||||
|
||||
// Iterate over all the runes...
|
||||
for i := rune(0); i < unicode.MaxRune; i++ {
|
||||
r := rune(i)
|
||||
|
||||
if !utf8.ValidRune(r) {
|
||||
continue
|
||||
}
|
||||
|
||||
e, ok := exceptions[i]
|
||||
p := e.prop
|
||||
switch {
|
||||
case ok:
|
||||
case !unicode.In(r, assigned):
|
||||
p = unassigned
|
||||
case r >= 0x0021 && r <= 0x007e: // Is ASCII 7
|
||||
p = pValid
|
||||
case unicode.In(r, disallowedRunes, unicode.Cc):
|
||||
p = disallowed
|
||||
case hasCompat(r):
|
||||
p = idDisOrFreePVal
|
||||
case isLetterDigits(r):
|
||||
p = pValid
|
||||
case isIdDisAndFreePVal(r):
|
||||
p = idDisOrFreePVal
|
||||
default:
|
||||
p = disallowed
|
||||
}
|
||||
cat := runeCategory[r]
|
||||
// Don't set category for runes that are disallowed.
|
||||
if p == disallowed {
|
||||
cat = exceptions[r].cat
|
||||
}
|
||||
propTrie.Insert(r, uint64(p)|uint64(cat))
|
||||
}
|
||||
sz, err := propTrie.Gen(w)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
w.Size += sz
|
||||
}
|
68
vendor/golang.org/x/text/secure/precis/gen_trieval.go
generated
vendored
68
vendor/golang.org/x/text/secure/precis/gen_trieval.go
generated
vendored
|
@ -1,68 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// entry is the entry of a trie table
|
||||
// 7..6 property (unassigned, disallowed, maybe, valid)
|
||||
// 5..0 category
|
||||
type entry uint8
|
||||
|
||||
const (
|
||||
propShift = 6
|
||||
propMask = 0xc0
|
||||
catMask = 0x3f
|
||||
)
|
||||
|
||||
func (e entry) property() property { return property(e & propMask) }
|
||||
func (e entry) category() category { return category(e & catMask) }
|
||||
|
||||
type property uint8
|
||||
|
||||
// The order of these constants matter. A Profile may consider runes to be
|
||||
// allowed either from pValid or idDisOrFreePVal.
|
||||
const (
|
||||
unassigned property = iota << propShift
|
||||
disallowed
|
||||
idDisOrFreePVal // disallowed for Identifier, pValid for FreeForm
|
||||
pValid
|
||||
)
|
||||
|
||||
// compute permutations of all properties and specialCategories.
|
||||
type category uint8
|
||||
|
||||
const (
|
||||
other category = iota
|
||||
|
||||
// Special rune types
|
||||
joiningL
|
||||
joiningD
|
||||
joiningT
|
||||
joiningR
|
||||
viramaModifier
|
||||
viramaJoinT // Virama + JoiningT
|
||||
latinSmallL // U+006c
|
||||
greek
|
||||
greekJoinT // Greek + JoiningT
|
||||
hebrew
|
||||
hebrewJoinT // Hebrew + JoiningT
|
||||
japanese // hirigana, katakana, han
|
||||
|
||||
// Special rune types associated with contextual rules defined in
|
||||
// https://tools.ietf.org/html/rfc5892#appendix-A.
|
||||
// ContextO
|
||||
zeroWidthNonJoiner // rule 1
|
||||
zeroWidthJoiner // rule 2
|
||||
// ContextJ
|
||||
middleDot // rule 3
|
||||
greekLowerNumeralSign // rule 4
|
||||
hebrewPreceding // rule 5 and 6
|
||||
katakanaMiddleDot // rule 7
|
||||
arabicIndicDigit // rule 8
|
||||
extendedArabicIndicDigit // rule 9
|
||||
|
||||
numCategories
|
||||
)
|
70
vendor/golang.org/x/text/secure/precis/nickname.go
generated
vendored
70
vendor/golang.org/x/text/secure/precis/nickname.go
generated
vendored
|
@ -1,70 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package precis
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
type nickAdditionalMapping struct {
|
||||
// TODO: This transformer needs to be stateless somehow…
|
||||
notStart bool
|
||||
prevSpace bool
|
||||
}
|
||||
|
||||
func (t *nickAdditionalMapping) Reset() {
|
||||
t.prevSpace = false
|
||||
t.notStart = false
|
||||
}
|
||||
|
||||
func (t *nickAdditionalMapping) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
// RFC 7700 §2.1. Rules
|
||||
//
|
||||
// 2. Additional Mapping Rule: The additional mapping rule consists of
|
||||
// the following sub-rules.
|
||||
//
|
||||
// 1. Any instances of non-ASCII space MUST be mapped to ASCII
|
||||
// space (U+0020); a non-ASCII space is any Unicode code point
|
||||
// having a general category of "Zs", naturally with the
|
||||
// exception of U+0020.
|
||||
//
|
||||
// 2. Any instances of the ASCII space character at the beginning
|
||||
// or end of a nickname MUST be removed (e.g., "stpeter " is
|
||||
// mapped to "stpeter").
|
||||
//
|
||||
// 3. Interior sequences of more than one ASCII space character
|
||||
// MUST be mapped to a single ASCII space character (e.g.,
|
||||
// "St Peter" is mapped to "St Peter").
|
||||
|
||||
for nSrc < len(src) {
|
||||
r, size := utf8.DecodeRune(src[nSrc:])
|
||||
if size == 0 { // Incomplete UTF-8 encoding
|
||||
if !atEOF {
|
||||
return nDst, nSrc, transform.ErrShortSrc
|
||||
}
|
||||
size = 1
|
||||
}
|
||||
if unicode.Is(unicode.Zs, r) {
|
||||
t.prevSpace = true
|
||||
} else {
|
||||
if t.prevSpace && t.notStart {
|
||||
dst[nDst] = ' '
|
||||
nDst += 1
|
||||
}
|
||||
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
|
||||
nDst += size
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
nDst += size
|
||||
t.prevSpace = false
|
||||
t.notStart = true
|
||||
}
|
||||
nSrc += size
|
||||
}
|
||||
return nDst, nSrc, nil
|
||||
}
|
153
vendor/golang.org/x/text/secure/precis/options.go
generated
vendored
153
vendor/golang.org/x/text/secure/precis/options.go
generated
vendored
|
@ -1,153 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package precis
|
||||
|
||||
import (
|
||||
"golang.org/x/text/cases"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/runes"
|
||||
"golang.org/x/text/transform"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
// An Option is used to define the behavior and rules of a Profile.
|
||||
type Option func(*options)
|
||||
|
||||
type options struct {
|
||||
// Preparation options
|
||||
foldWidth bool
|
||||
|
||||
// Enforcement options
|
||||
asciiLower bool
|
||||
cases transform.SpanningTransformer
|
||||
disallow runes.Set
|
||||
norm transform.SpanningTransformer
|
||||
additional []func() transform.SpanningTransformer
|
||||
width transform.SpanningTransformer
|
||||
disallowEmpty bool
|
||||
bidiRule bool
|
||||
|
||||
// Comparison options
|
||||
ignorecase bool
|
||||
}
|
||||
|
||||
func getOpts(o ...Option) (res options) {
|
||||
for _, f := range o {
|
||||
f(&res)
|
||||
}
|
||||
// Using a SpanningTransformer, instead of norm.Form prevents an allocation
|
||||
// down the road.
|
||||
if res.norm == nil {
|
||||
res.norm = norm.NFC
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
var (
|
||||
// The IgnoreCase option causes the profile to perform a case insensitive
|
||||
// comparison during the PRECIS comparison step.
|
||||
IgnoreCase Option = ignoreCase
|
||||
|
||||
// The FoldWidth option causes the profile to map non-canonical wide and
|
||||
// narrow variants to their decomposition mapping. This is useful for
|
||||
// profiles that are based on the identifier class which would otherwise
|
||||
// disallow such characters.
|
||||
FoldWidth Option = foldWidth
|
||||
|
||||
// The DisallowEmpty option causes the enforcement step to return an error if
|
||||
// the resulting string would be empty.
|
||||
DisallowEmpty Option = disallowEmpty
|
||||
|
||||
// The BidiRule option causes the Bidi Rule defined in RFC 5893 to be
|
||||
// applied.
|
||||
BidiRule Option = bidiRule
|
||||
)
|
||||
|
||||
var (
|
||||
ignoreCase = func(o *options) {
|
||||
o.ignorecase = true
|
||||
}
|
||||
foldWidth = func(o *options) {
|
||||
o.foldWidth = true
|
||||
}
|
||||
disallowEmpty = func(o *options) {
|
||||
o.disallowEmpty = true
|
||||
}
|
||||
bidiRule = func(o *options) {
|
||||
o.bidiRule = true
|
||||
}
|
||||
)
|
||||
|
||||
// TODO: move this logic to package transform
|
||||
|
||||
type spanWrap struct{ transform.Transformer }
|
||||
|
||||
func (s spanWrap) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
return 0, transform.ErrEndOfSpan
|
||||
}
|
||||
|
||||
// TODO: allow different types? For instance:
|
||||
// func() transform.Transformer
|
||||
// func() transform.SpanningTransformer
|
||||
// func([]byte) bool // validation only
|
||||
//
|
||||
// Also, would be great if we could detect if a transformer is reentrant.
|
||||
|
||||
// The AdditionalMapping option defines the additional mapping rule for the
|
||||
// Profile by applying Transformer's in sequence.
|
||||
func AdditionalMapping(t ...func() transform.Transformer) Option {
|
||||
return func(o *options) {
|
||||
for _, f := range t {
|
||||
sf := func() transform.SpanningTransformer {
|
||||
return f().(transform.SpanningTransformer)
|
||||
}
|
||||
if _, ok := f().(transform.SpanningTransformer); !ok {
|
||||
sf = func() transform.SpanningTransformer {
|
||||
return spanWrap{f()}
|
||||
}
|
||||
}
|
||||
o.additional = append(o.additional, sf)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The Norm option defines a Profile's normalization rule. Defaults to NFC.
|
||||
func Norm(f norm.Form) Option {
|
||||
return func(o *options) {
|
||||
o.norm = f
|
||||
}
|
||||
}
|
||||
|
||||
// The FoldCase option defines a Profile's case mapping rule. Options can be
|
||||
// provided to determine the type of case folding used.
|
||||
func FoldCase(opts ...cases.Option) Option {
|
||||
return func(o *options) {
|
||||
o.asciiLower = true
|
||||
o.cases = cases.Fold(opts...)
|
||||
}
|
||||
}
|
||||
|
||||
// The LowerCase option defines a Profile's case mapping rule. Options can be
|
||||
// provided to determine the type of case folding used.
|
||||
func LowerCase(opts ...cases.Option) Option {
|
||||
return func(o *options) {
|
||||
o.asciiLower = true
|
||||
if len(opts) == 0 {
|
||||
o.cases = cases.Lower(language.Und, cases.HandleFinalSigma(false))
|
||||
return
|
||||
}
|
||||
|
||||
opts = append([]cases.Option{cases.HandleFinalSigma(false)}, opts...)
|
||||
o.cases = cases.Lower(language.Und, opts...)
|
||||
}
|
||||
}
|
||||
|
||||
// The Disallow option further restricts a Profile's allowed characters beyond
|
||||
// what is disallowed by the underlying string class.
|
||||
func Disallow(set runes.Set) Option {
|
||||
return func(o *options) {
|
||||
o.disallow = set
|
||||
}
|
||||
}
|
388
vendor/golang.org/x/text/secure/precis/profile.go
generated
vendored
388
vendor/golang.org/x/text/secure/precis/profile.go
generated
vendored
|
@ -1,388 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package precis
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/cases"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/runes"
|
||||
"golang.org/x/text/secure/bidirule"
|
||||
"golang.org/x/text/transform"
|
||||
"golang.org/x/text/width"
|
||||
)
|
||||
|
||||
var (
|
||||
errDisallowedRune = errors.New("precis: disallowed rune encountered")
|
||||
)
|
||||
|
||||
var dpTrie = newDerivedPropertiesTrie(0)
|
||||
|
||||
// A Profile represents a set of rules for normalizing and validating strings in
|
||||
// the PRECIS framework.
|
||||
type Profile struct {
|
||||
options
|
||||
class *class
|
||||
}
|
||||
|
||||
// NewIdentifier creates a new PRECIS profile based on the Identifier string
|
||||
// class. Profiles created from this class are suitable for use where safety is
|
||||
// prioritized over expressiveness like network identifiers, user accounts, chat
|
||||
// rooms, and file names.
|
||||
func NewIdentifier(opts ...Option) *Profile {
|
||||
return &Profile{
|
||||
options: getOpts(opts...),
|
||||
class: identifier,
|
||||
}
|
||||
}
|
||||
|
||||
// NewFreeform creates a new PRECIS profile based on the Freeform string class.
|
||||
// Profiles created from this class are suitable for use where expressiveness is
|
||||
// prioritized over safety like passwords, and display-elements such as
|
||||
// nicknames in a chat room.
|
||||
func NewFreeform(opts ...Option) *Profile {
|
||||
return &Profile{
|
||||
options: getOpts(opts...),
|
||||
class: freeform,
|
||||
}
|
||||
}
|
||||
|
||||
// NewTransformer creates a new transform.Transformer that performs the PRECIS
|
||||
// preparation and enforcement steps on the given UTF-8 encoded bytes.
|
||||
func (p *Profile) NewTransformer() *Transformer {
|
||||
var ts []transform.Transformer
|
||||
|
||||
// These transforms are applied in the order defined in
|
||||
// https://tools.ietf.org/html/rfc7564#section-7
|
||||
|
||||
if p.options.foldWidth {
|
||||
ts = append(ts, width.Fold)
|
||||
}
|
||||
|
||||
for _, f := range p.options.additional {
|
||||
ts = append(ts, f())
|
||||
}
|
||||
|
||||
if p.options.cases != nil {
|
||||
ts = append(ts, p.options.cases)
|
||||
}
|
||||
|
||||
ts = append(ts, p.options.norm)
|
||||
|
||||
if p.options.bidiRule {
|
||||
ts = append(ts, bidirule.New())
|
||||
}
|
||||
|
||||
ts = append(ts, &checker{p: p, allowed: p.Allowed()})
|
||||
|
||||
// TODO: Add the disallow empty rule with a dummy transformer?
|
||||
|
||||
return &Transformer{transform.Chain(ts...)}
|
||||
}
|
||||
|
||||
var errEmptyString = errors.New("precis: transformation resulted in empty string")
|
||||
|
||||
type buffers struct {
|
||||
src []byte
|
||||
buf [2][]byte
|
||||
next int
|
||||
}
|
||||
|
||||
func (b *buffers) apply(t transform.SpanningTransformer) (err error) {
|
||||
n, err := t.Span(b.src, true)
|
||||
if err != transform.ErrEndOfSpan {
|
||||
return err
|
||||
}
|
||||
x := b.next & 1
|
||||
if b.buf[x] == nil {
|
||||
b.buf[x] = make([]byte, 0, 8+len(b.src)+len(b.src)>>2)
|
||||
}
|
||||
span := append(b.buf[x][:0], b.src[:n]...)
|
||||
b.src, _, err = transform.Append(t, span, b.src[n:])
|
||||
b.buf[x] = b.src
|
||||
b.next++
|
||||
return err
|
||||
}
|
||||
|
||||
// Pre-allocate transformers when possible. In some cases this avoids allocation.
|
||||
var (
|
||||
foldWidthT transform.SpanningTransformer = width.Fold
|
||||
lowerCaseT transform.SpanningTransformer = cases.Lower(language.Und, cases.HandleFinalSigma(false))
|
||||
)
|
||||
|
||||
// TODO: make this a method on profile.
|
||||
|
||||
func (b *buffers) enforce(p *Profile, src []byte, comparing bool) (str []byte, err error) {
|
||||
b.src = src
|
||||
|
||||
ascii := true
|
||||
for _, c := range src {
|
||||
if c >= utf8.RuneSelf {
|
||||
ascii = false
|
||||
break
|
||||
}
|
||||
}
|
||||
// ASCII fast path.
|
||||
if ascii {
|
||||
for _, f := range p.options.additional {
|
||||
if err = b.apply(f()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
switch {
|
||||
case p.options.asciiLower || (comparing && p.options.ignorecase):
|
||||
for i, c := range b.src {
|
||||
if 'A' <= c && c <= 'Z' {
|
||||
b.src[i] = c ^ 1<<5
|
||||
}
|
||||
}
|
||||
case p.options.cases != nil:
|
||||
b.apply(p.options.cases)
|
||||
}
|
||||
c := checker{p: p}
|
||||
if _, err := c.span(b.src, true); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if p.disallow != nil {
|
||||
for _, c := range b.src {
|
||||
if p.disallow.Contains(rune(c)) {
|
||||
return nil, errDisallowedRune
|
||||
}
|
||||
}
|
||||
}
|
||||
if p.options.disallowEmpty && len(b.src) == 0 {
|
||||
return nil, errEmptyString
|
||||
}
|
||||
return b.src, nil
|
||||
}
|
||||
|
||||
// These transforms are applied in the order defined in
|
||||
// https://tools.ietf.org/html/rfc7564#section-7
|
||||
|
||||
// TODO: allow different width transforms options.
|
||||
if p.options.foldWidth || (p.options.ignorecase && comparing) {
|
||||
b.apply(foldWidthT)
|
||||
}
|
||||
for _, f := range p.options.additional {
|
||||
if err = b.apply(f()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if p.options.cases != nil {
|
||||
b.apply(p.options.cases)
|
||||
}
|
||||
if comparing && p.options.ignorecase {
|
||||
b.apply(lowerCaseT)
|
||||
}
|
||||
b.apply(p.norm)
|
||||
if p.options.bidiRule && !bidirule.Valid(b.src) {
|
||||
return nil, bidirule.ErrInvalid
|
||||
}
|
||||
c := checker{p: p}
|
||||
if _, err := c.span(b.src, true); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if p.disallow != nil {
|
||||
for i := 0; i < len(b.src); {
|
||||
r, size := utf8.DecodeRune(b.src[i:])
|
||||
if p.disallow.Contains(r) {
|
||||
return nil, errDisallowedRune
|
||||
}
|
||||
i += size
|
||||
}
|
||||
}
|
||||
if p.options.disallowEmpty && len(b.src) == 0 {
|
||||
return nil, errEmptyString
|
||||
}
|
||||
return b.src, nil
|
||||
}
|
||||
|
||||
// Append appends the result of applying p to src writing the result to dst.
|
||||
// It returns an error if the input string is invalid.
|
||||
func (p *Profile) Append(dst, src []byte) ([]byte, error) {
|
||||
var buf buffers
|
||||
b, err := buf.enforce(p, src, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return append(dst, b...), nil
|
||||
}
|
||||
|
||||
func processBytes(p *Profile, b []byte, key bool) ([]byte, error) {
|
||||
var buf buffers
|
||||
b, err := buf.enforce(p, b, key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if buf.next == 0 {
|
||||
c := make([]byte, len(b))
|
||||
copy(c, b)
|
||||
return c, nil
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
// Bytes returns a new byte slice with the result of applying the profile to b.
|
||||
func (p *Profile) Bytes(b []byte) ([]byte, error) {
|
||||
return processBytes(p, b, false)
|
||||
}
|
||||
|
||||
// AppendCompareKey appends the result of applying p to src (including any
|
||||
// optional rules to make strings comparable or useful in a map key such as
|
||||
// applying lowercasing) writing the result to dst. It returns an error if the
|
||||
// input string is invalid.
|
||||
func (p *Profile) AppendCompareKey(dst, src []byte) ([]byte, error) {
|
||||
var buf buffers
|
||||
b, err := buf.enforce(p, src, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return append(dst, b...), nil
|
||||
}
|
||||
|
||||
func processString(p *Profile, s string, key bool) (string, error) {
|
||||
var buf buffers
|
||||
b, err := buf.enforce(p, []byte(s), key)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(b), nil
|
||||
}
|
||||
|
||||
// String returns a string with the result of applying the profile to s.
|
||||
func (p *Profile) String(s string) (string, error) {
|
||||
return processString(p, s, false)
|
||||
}
|
||||
|
||||
// CompareKey returns a string that can be used for comparison, hashing, or
|
||||
// collation.
|
||||
func (p *Profile) CompareKey(s string) (string, error) {
|
||||
return processString(p, s, true)
|
||||
}
|
||||
|
||||
// Compare enforces both strings, and then compares them for bit-string identity
|
||||
// (byte-for-byte equality). If either string cannot be enforced, the comparison
|
||||
// is false.
|
||||
func (p *Profile) Compare(a, b string) bool {
|
||||
var buf buffers
|
||||
|
||||
akey, err := buf.enforce(p, []byte(a), true)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
buf = buffers{}
|
||||
bkey, err := buf.enforce(p, []byte(b), true)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return bytes.Compare(akey, bkey) == 0
|
||||
}
|
||||
|
||||
// Allowed returns a runes.Set containing every rune that is a member of the
|
||||
// underlying profile's string class and not disallowed by any profile specific
|
||||
// rules.
|
||||
func (p *Profile) Allowed() runes.Set {
|
||||
if p.options.disallow != nil {
|
||||
return runes.Predicate(func(r rune) bool {
|
||||
return p.class.Contains(r) && !p.options.disallow.Contains(r)
|
||||
})
|
||||
}
|
||||
return p.class
|
||||
}
|
||||
|
||||
type checker struct {
|
||||
p *Profile
|
||||
allowed runes.Set
|
||||
|
||||
beforeBits catBitmap
|
||||
termBits catBitmap
|
||||
acceptBits catBitmap
|
||||
}
|
||||
|
||||
func (c *checker) Reset() {
|
||||
c.beforeBits = 0
|
||||
c.termBits = 0
|
||||
c.acceptBits = 0
|
||||
}
|
||||
|
||||
func (c *checker) span(src []byte, atEOF bool) (n int, err error) {
|
||||
for n < len(src) {
|
||||
e, sz := dpTrie.lookup(src[n:])
|
||||
d := categoryTransitions[category(e&catMask)]
|
||||
if sz == 0 {
|
||||
if !atEOF {
|
||||
return n, transform.ErrShortSrc
|
||||
}
|
||||
return n, errDisallowedRune
|
||||
}
|
||||
if property(e) < c.p.class.validFrom {
|
||||
if d.rule == nil {
|
||||
return n, errDisallowedRune
|
||||
}
|
||||
doLookAhead, err := d.rule(c.beforeBits)
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
if doLookAhead {
|
||||
c.beforeBits &= d.keep
|
||||
c.beforeBits |= d.set
|
||||
// We may still have a lookahead rule which we will require to
|
||||
// complete (by checking termBits == 0) before setting the new
|
||||
// bits.
|
||||
if c.termBits != 0 && (!c.checkLookahead() || c.termBits == 0) {
|
||||
return n, err
|
||||
}
|
||||
c.termBits = d.term
|
||||
c.acceptBits = d.accept
|
||||
n += sz
|
||||
continue
|
||||
}
|
||||
}
|
||||
c.beforeBits &= d.keep
|
||||
c.beforeBits |= d.set
|
||||
if c.termBits != 0 && !c.checkLookahead() {
|
||||
return n, errContext
|
||||
}
|
||||
n += sz
|
||||
}
|
||||
if m := c.beforeBits >> finalShift; c.beforeBits&m != m || c.termBits != 0 {
|
||||
err = errContext
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (c *checker) checkLookahead() bool {
|
||||
switch {
|
||||
case c.beforeBits&c.termBits != 0:
|
||||
c.termBits = 0
|
||||
c.acceptBits = 0
|
||||
case c.beforeBits&c.acceptBits != 0:
|
||||
default:
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// TODO: we may get rid of this transform if transform.Chain understands
|
||||
// something like a Spanner interface.
|
||||
func (c checker) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
short := false
|
||||
if len(dst) < len(src) {
|
||||
src = src[:len(dst)]
|
||||
atEOF = false
|
||||
short = true
|
||||
}
|
||||
nSrc, err = c.span(src, atEOF)
|
||||
nDst = copy(dst, src[:nSrc])
|
||||
if short && (err == transform.ErrShortSrc || err == nil) {
|
||||
err = transform.ErrShortDst
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
78
vendor/golang.org/x/text/secure/precis/profiles.go
generated
vendored
78
vendor/golang.org/x/text/secure/precis/profiles.go
generated
vendored
|
@ -1,78 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package precis
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/runes"
|
||||
"golang.org/x/text/transform"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
var (
|
||||
// Implements the Nickname profile specified in RFC 7700.
|
||||
// The nickname profile is not idempotent and may need to be applied multiple
|
||||
// times before being used for comparisons.
|
||||
Nickname *Profile = nickname
|
||||
|
||||
// Implements the UsernameCaseMapped profile specified in RFC 7613.
|
||||
UsernameCaseMapped *Profile = usernameCaseMap
|
||||
|
||||
// Implements the UsernameCasePreserved profile specified in RFC 7613.
|
||||
UsernameCasePreserved *Profile = usernameNoCaseMap
|
||||
|
||||
// Implements the OpaqueString profile defined in RFC 7613 for passwords and other secure labels.
|
||||
OpaqueString *Profile = opaquestring
|
||||
)
|
||||
|
||||
var (
|
||||
nickname = &Profile{
|
||||
options: getOpts(
|
||||
AdditionalMapping(func() transform.Transformer {
|
||||
return &nickAdditionalMapping{}
|
||||
}),
|
||||
IgnoreCase,
|
||||
Norm(norm.NFKC),
|
||||
DisallowEmpty,
|
||||
),
|
||||
class: freeform,
|
||||
}
|
||||
usernameCaseMap = &Profile{
|
||||
options: getOpts(
|
||||
FoldWidth,
|
||||
LowerCase(),
|
||||
Norm(norm.NFC),
|
||||
BidiRule,
|
||||
),
|
||||
class: identifier,
|
||||
}
|
||||
usernameNoCaseMap = &Profile{
|
||||
options: getOpts(
|
||||
FoldWidth,
|
||||
Norm(norm.NFC),
|
||||
BidiRule,
|
||||
),
|
||||
class: identifier,
|
||||
}
|
||||
opaquestring = &Profile{
|
||||
options: getOpts(
|
||||
AdditionalMapping(func() transform.Transformer {
|
||||
return mapSpaces
|
||||
}),
|
||||
Norm(norm.NFC),
|
||||
DisallowEmpty,
|
||||
),
|
||||
class: freeform,
|
||||
}
|
||||
)
|
||||
|
||||
// mapSpaces is a shared value of a runes.Map transformer.
|
||||
var mapSpaces transform.Transformer = runes.Map(func(r rune) rune {
|
||||
if unicode.Is(unicode.Zs, r) {
|
||||
return ' '
|
||||
}
|
||||
return r
|
||||
})
|
3788
vendor/golang.org/x/text/secure/precis/tables.go
generated
vendored
3788
vendor/golang.org/x/text/secure/precis/tables.go
generated
vendored
File diff suppressed because it is too large
Load diff
32
vendor/golang.org/x/text/secure/precis/transformer.go
generated
vendored
32
vendor/golang.org/x/text/secure/precis/transformer.go
generated
vendored
|
@ -1,32 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package precis
|
||||
|
||||
import "golang.org/x/text/transform"
|
||||
|
||||
// Transformer implements the transform.Transformer interface.
|
||||
type Transformer struct {
|
||||
t transform.Transformer
|
||||
}
|
||||
|
||||
// Reset implements the transform.Transformer interface.
|
||||
func (t Transformer) Reset() { t.t.Reset() }
|
||||
|
||||
// Transform implements the transform.Transformer interface.
|
||||
func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
return t.t.Transform(dst, src, atEOF)
|
||||
}
|
||||
|
||||
// Bytes returns a new byte slice with the result of applying t to b.
|
||||
func (t Transformer) Bytes(b []byte) []byte {
|
||||
b, _, _ = transform.Bytes(t, b)
|
||||
return b
|
||||
}
|
||||
|
||||
// String returns a string with the result of applying t to s.
|
||||
func (t Transformer) String(s string) string {
|
||||
s, _, _ = transform.String(t, s)
|
||||
return s
|
||||
}
|
64
vendor/golang.org/x/text/secure/precis/trieval.go
generated
vendored
64
vendor/golang.org/x/text/secure/precis/trieval.go
generated
vendored
|
@ -1,64 +0,0 @@
|
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package precis
|
||||
|
||||
// entry is the entry of a trie table
|
||||
// 7..6 property (unassigned, disallowed, maybe, valid)
|
||||
// 5..0 category
|
||||
type entry uint8
|
||||
|
||||
const (
|
||||
propShift = 6
|
||||
propMask = 0xc0
|
||||
catMask = 0x3f
|
||||
)
|
||||
|
||||
func (e entry) property() property { return property(e & propMask) }
|
||||
func (e entry) category() category { return category(e & catMask) }
|
||||
|
||||
type property uint8
|
||||
|
||||
// The order of these constants matter. A Profile may consider runes to be
|
||||
// allowed either from pValid or idDisOrFreePVal.
|
||||
const (
|
||||
unassigned property = iota << propShift
|
||||
disallowed
|
||||
idDisOrFreePVal // disallowed for Identifier, pValid for FreeForm
|
||||
pValid
|
||||
)
|
||||
|
||||
// compute permutations of all properties and specialCategories.
|
||||
type category uint8
|
||||
|
||||
const (
|
||||
other category = iota
|
||||
|
||||
// Special rune types
|
||||
joiningL
|
||||
joiningD
|
||||
joiningT
|
||||
joiningR
|
||||
viramaModifier
|
||||
viramaJoinT // Virama + JoiningT
|
||||
latinSmallL // U+006c
|
||||
greek
|
||||
greekJoinT // Greek + JoiningT
|
||||
hebrew
|
||||
hebrewJoinT // Hebrew + JoiningT
|
||||
japanese // hirigana, katakana, han
|
||||
|
||||
// Special rune types associated with contextual rules defined in
|
||||
// https://tools.ietf.org/html/rfc5892#appendix-A.
|
||||
// ContextO
|
||||
zeroWidthNonJoiner // rule 1
|
||||
zeroWidthJoiner // rule 2
|
||||
// ContextJ
|
||||
middleDot // rule 3
|
||||
greekLowerNumeralSign // rule 4
|
||||
hebrewPreceding // rule 5 and 6
|
||||
katakanaMiddleDot // rule 7
|
||||
arabicIndicDigit // rule 8
|
||||
extendedArabicIndicDigit // rule 9
|
||||
|
||||
numCategories
|
||||
)
|
115
vendor/golang.org/x/text/width/gen.go
generated
vendored
115
vendor/golang.org/x/text/width/gen.go
generated
vendored
|
@ -1,115 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
// This program generates the trie for width operations. The generated table
|
||||
// includes width category information as well as the normalization mappings.
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"math"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/triegen"
|
||||
)
|
||||
|
||||
// See gen_common.go for flags.
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
genTables()
|
||||
genTests()
|
||||
gen.Repackage("gen_trieval.go", "trieval.go", "width")
|
||||
gen.Repackage("gen_common.go", "common_test.go", "width")
|
||||
}
|
||||
|
||||
func genTables() {
|
||||
t := triegen.NewTrie("width")
|
||||
// fold and inverse mappings. See mapComment for a description of the format
|
||||
// of each entry. Add dummy value to make an index of 0 mean no mapping.
|
||||
inverse := [][4]byte{{}}
|
||||
mapping := map[[4]byte]int{[4]byte{}: 0}
|
||||
|
||||
getWidthData(func(r rune, tag elem, alt rune) {
|
||||
idx := 0
|
||||
if alt != 0 {
|
||||
var buf [4]byte
|
||||
buf[0] = byte(utf8.EncodeRune(buf[1:], alt))
|
||||
s := string(r)
|
||||
buf[buf[0]] ^= s[len(s)-1]
|
||||
var ok bool
|
||||
if idx, ok = mapping[buf]; !ok {
|
||||
idx = len(mapping)
|
||||
if idx > math.MaxUint8 {
|
||||
log.Fatalf("Index %d does not fit in a byte.", idx)
|
||||
}
|
||||
mapping[buf] = idx
|
||||
inverse = append(inverse, buf)
|
||||
}
|
||||
}
|
||||
t.Insert(r, uint64(tag|elem(idx)))
|
||||
})
|
||||
|
||||
w := &bytes.Buffer{}
|
||||
gen.WriteUnicodeVersion(w)
|
||||
|
||||
sz, err := t.Gen(w)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
sz += writeMappings(w, inverse)
|
||||
|
||||
fmt.Fprintf(w, "// Total table size %d bytes (%dKiB)\n", sz, sz/1024)
|
||||
|
||||
gen.WriteGoFile(*outputFile, "width", w.Bytes())
|
||||
}
|
||||
|
||||
const inverseDataComment = `
|
||||
// inverseData contains 4-byte entries of the following format:
|
||||
// <length> <modified UTF-8-encoded rune> <0 padding>
|
||||
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
|
||||
// UTF-8 encoding of the original rune. Mappings often have the following
|
||||
// pattern:
|
||||
// A -> A (U+FF21 -> U+0041)
|
||||
// B -> B (U+FF22 -> U+0042)
|
||||
// ...
|
||||
// By xor-ing the last byte the same entry can be shared by many mappings. This
|
||||
// reduces the total number of distinct entries by about two thirds.
|
||||
// The resulting entry for the aforementioned mappings is
|
||||
// { 0x01, 0xE0, 0x00, 0x00 }
|
||||
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
|
||||
// E0 ^ A1 = 41.
|
||||
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
|
||||
// E0 ^ A2 = 42.
|
||||
// Note that because of the xor-ing, the byte sequence stored in the entry is
|
||||
// not valid UTF-8.`
|
||||
|
||||
func writeMappings(w io.Writer, data [][4]byte) int {
|
||||
fmt.Fprintln(w, inverseDataComment)
|
||||
fmt.Fprintf(w, "var inverseData = [%d][4]byte{\n", len(data))
|
||||
for _, x := range data {
|
||||
fmt.Fprintf(w, "{ 0x%02x, 0x%02x, 0x%02x, 0x%02x },\n", x[0], x[1], x[2], x[3])
|
||||
}
|
||||
fmt.Fprintln(w, "}")
|
||||
return len(data) * 4
|
||||
}
|
||||
|
||||
func genTests() {
|
||||
w := &bytes.Buffer{}
|
||||
fmt.Fprintf(w, "\nvar mapRunes = map[rune]struct{r rune; e elem}{\n")
|
||||
getWidthData(func(r rune, tag elem, alt rune) {
|
||||
if alt != 0 {
|
||||
fmt.Fprintf(w, "\t0x%X: {0x%X, 0x%X},\n", r, alt, tag)
|
||||
}
|
||||
})
|
||||
fmt.Fprintln(w, "}")
|
||||
gen.WriteGoFile("runes_test.go", "width", w.Bytes())
|
||||
}
|
96
vendor/golang.org/x/text/width/gen_common.go
generated
vendored
96
vendor/golang.org/x/text/width/gen_common.go
generated
vendored
|
@ -1,96 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This code is shared between the main code generator and the test code.
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"log"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
)
|
||||
|
||||
var (
|
||||
outputFile = flag.String("out", "tables.go", "output file")
|
||||
)
|
||||
|
||||
var typeMap = map[string]elem{
|
||||
"A": tagAmbiguous,
|
||||
"N": tagNeutral,
|
||||
"Na": tagNarrow,
|
||||
"W": tagWide,
|
||||
"F": tagFullwidth,
|
||||
"H": tagHalfwidth,
|
||||
}
|
||||
|
||||
// getWidthData calls f for every entry for which it is defined.
|
||||
//
|
||||
// f may be called multiple times for the same rune. The last call to f is the
|
||||
// correct value. f is not called for all runes. The default tag type is
|
||||
// Neutral.
|
||||
func getWidthData(f func(r rune, tag elem, alt rune)) {
|
||||
// Set the default values for Unified Ideographs. In line with Annex 11,
|
||||
// we encode full ranges instead of the defined runes in Unified_Ideograph.
|
||||
for _, b := range []struct{ lo, hi rune }{
|
||||
{0x4E00, 0x9FFF}, // the CJK Unified Ideographs block,
|
||||
{0x3400, 0x4DBF}, // the CJK Unified Ideographs Externsion A block,
|
||||
{0xF900, 0xFAFF}, // the CJK Compatibility Ideographs block,
|
||||
{0x20000, 0x2FFFF}, // the Supplementary Ideographic Plane,
|
||||
{0x30000, 0x3FFFF}, // the Tertiary Ideographic Plane,
|
||||
} {
|
||||
for r := b.lo; r <= b.hi; r++ {
|
||||
f(r, tagWide, 0)
|
||||
}
|
||||
}
|
||||
|
||||
inverse := map[rune]rune{}
|
||||
maps := map[string]bool{
|
||||
"<wide>": true,
|
||||
"<narrow>": true,
|
||||
}
|
||||
|
||||
// We cannot reuse package norm's decomposition, as we need an unexpanded
|
||||
// decomposition. We make use of the opportunity to verify that the
|
||||
// decomposition type is as expected.
|
||||
ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
|
||||
r := p.Rune(0)
|
||||
s := strings.SplitN(p.String(ucd.DecompMapping), " ", 2)
|
||||
if !maps[s[0]] {
|
||||
return
|
||||
}
|
||||
x, err := strconv.ParseUint(s[1], 16, 32)
|
||||
if err != nil {
|
||||
log.Fatalf("Error parsing rune %q", s[1])
|
||||
}
|
||||
if inverse[r] != 0 || inverse[rune(x)] != 0 {
|
||||
log.Fatalf("Circular dependency in mapping between %U and %U", r, x)
|
||||
}
|
||||
inverse[r] = rune(x)
|
||||
inverse[rune(x)] = r
|
||||
})
|
||||
|
||||
// <rune range>;<type>
|
||||
ucd.Parse(gen.OpenUCDFile("EastAsianWidth.txt"), func(p *ucd.Parser) {
|
||||
tag, ok := typeMap[p.String(1)]
|
||||
if !ok {
|
||||
log.Fatalf("Unknown width type %q", p.String(1))
|
||||
}
|
||||
r := p.Rune(0)
|
||||
alt, ok := inverse[r]
|
||||
if tag == tagFullwidth || tag == tagHalfwidth && r != wonSign {
|
||||
tag |= tagNeedsFold
|
||||
if !ok {
|
||||
log.Fatalf("Narrow or wide rune %U has no decomposition", r)
|
||||
}
|
||||
}
|
||||
f(r, tag, alt)
|
||||
})
|
||||
}
|
34
vendor/golang.org/x/text/width/gen_trieval.go
generated
vendored
34
vendor/golang.org/x/text/width/gen_trieval.go
generated
vendored
|
@ -1,34 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// elem is an entry of the width trie. The high byte is used to encode the type
|
||||
// of the rune. The low byte is used to store the index to a mapping entry in
|
||||
// the inverseData array.
|
||||
type elem uint16
|
||||
|
||||
const (
|
||||
tagNeutral elem = iota << typeShift
|
||||
tagAmbiguous
|
||||
tagWide
|
||||
tagNarrow
|
||||
tagFullwidth
|
||||
tagHalfwidth
|
||||
)
|
||||
|
||||
const (
|
||||
numTypeBits = 3
|
||||
typeShift = 16 - numTypeBits
|
||||
|
||||
// tagNeedsFold is true for all fullwidth and halfwidth runes except for
|
||||
// the Won sign U+20A9.
|
||||
tagNeedsFold = 0x1000
|
||||
|
||||
// The Korean Won sign is halfwidth, but SHOULD NOT be mapped to a wide
|
||||
// variant.
|
||||
wonSign rune = 0x20A9
|
||||
)
|
16
vendor/golang.org/x/text/width/kind_string.go
generated
vendored
16
vendor/golang.org/x/text/width/kind_string.go
generated
vendored
|
@ -1,16 +0,0 @@
|
|||
// Code generated by "stringer -type=Kind"; DO NOT EDIT
|
||||
|
||||
package width
|
||||
|
||||
import "fmt"
|
||||
|
||||
const _Kind_name = "NeutralEastAsianAmbiguousEastAsianWideEastAsianNarrowEastAsianFullwidthEastAsianHalfwidth"
|
||||
|
||||
var _Kind_index = [...]uint8{0, 7, 25, 38, 53, 71, 89}
|
||||
|
||||
func (i Kind) String() string {
|
||||
if i < 0 || i >= Kind(len(_Kind_index)-1) {
|
||||
return fmt.Sprintf("Kind(%d)", i)
|
||||
}
|
||||
return _Kind_name[_Kind_index[i]:_Kind_index[i+1]]
|
||||
}
|
1284
vendor/golang.org/x/text/width/tables.go
generated
vendored
1284
vendor/golang.org/x/text/width/tables.go
generated
vendored
File diff suppressed because it is too large
Load diff
239
vendor/golang.org/x/text/width/transform.go
generated
vendored
239
vendor/golang.org/x/text/width/transform.go
generated
vendored
|
@ -1,239 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package width
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
type foldTransform struct {
|
||||
transform.NopResetter
|
||||
}
|
||||
|
||||
func (foldTransform) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
for n < len(src) {
|
||||
if src[n] < utf8.RuneSelf {
|
||||
// ASCII fast path.
|
||||
for n++; n < len(src) && src[n] < utf8.RuneSelf; n++ {
|
||||
}
|
||||
continue
|
||||
}
|
||||
v, size := trie.lookup(src[n:])
|
||||
if size == 0 { // incomplete UTF-8 encoding
|
||||
if !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
} else {
|
||||
n = len(src)
|
||||
}
|
||||
break
|
||||
}
|
||||
if elem(v)&tagNeedsFold != 0 {
|
||||
err = transform.ErrEndOfSpan
|
||||
break
|
||||
}
|
||||
n += size
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (foldTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
for nSrc < len(src) {
|
||||
if src[nSrc] < utf8.RuneSelf {
|
||||
// ASCII fast path.
|
||||
start, end := nSrc, len(src)
|
||||
if d := len(dst) - nDst; d < end-start {
|
||||
end = nSrc + d
|
||||
}
|
||||
for nSrc++; nSrc < end && src[nSrc] < utf8.RuneSelf; nSrc++ {
|
||||
}
|
||||
n := copy(dst[nDst:], src[start:nSrc])
|
||||
if nDst += n; nDst == len(dst) {
|
||||
nSrc = start + n
|
||||
if nSrc == len(src) {
|
||||
return nDst, nSrc, nil
|
||||
}
|
||||
if src[nSrc] < utf8.RuneSelf {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
v, size := trie.lookup(src[nSrc:])
|
||||
if size == 0 { // incomplete UTF-8 encoding
|
||||
if !atEOF {
|
||||
return nDst, nSrc, transform.ErrShortSrc
|
||||
}
|
||||
size = 1 // gobble 1 byte
|
||||
}
|
||||
if elem(v)&tagNeedsFold == 0 {
|
||||
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
nDst += size
|
||||
} else {
|
||||
data := inverseData[byte(v)]
|
||||
if len(dst)-nDst < int(data[0]) {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
i := 1
|
||||
for end := int(data[0]); i < end; i++ {
|
||||
dst[nDst] = data[i]
|
||||
nDst++
|
||||
}
|
||||
dst[nDst] = data[i] ^ src[nSrc+size-1]
|
||||
nDst++
|
||||
}
|
||||
nSrc += size
|
||||
}
|
||||
return nDst, nSrc, nil
|
||||
}
|
||||
|
||||
type narrowTransform struct {
|
||||
transform.NopResetter
|
||||
}
|
||||
|
||||
func (narrowTransform) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
for n < len(src) {
|
||||
if src[n] < utf8.RuneSelf {
|
||||
// ASCII fast path.
|
||||
for n++; n < len(src) && src[n] < utf8.RuneSelf; n++ {
|
||||
}
|
||||
continue
|
||||
}
|
||||
v, size := trie.lookup(src[n:])
|
||||
if size == 0 { // incomplete UTF-8 encoding
|
||||
if !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
} else {
|
||||
n = len(src)
|
||||
}
|
||||
break
|
||||
}
|
||||
if k := elem(v).kind(); byte(v) == 0 || k != EastAsianFullwidth && k != EastAsianWide && k != EastAsianAmbiguous {
|
||||
} else {
|
||||
err = transform.ErrEndOfSpan
|
||||
break
|
||||
}
|
||||
n += size
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (narrowTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
for nSrc < len(src) {
|
||||
if src[nSrc] < utf8.RuneSelf {
|
||||
// ASCII fast path.
|
||||
start, end := nSrc, len(src)
|
||||
if d := len(dst) - nDst; d < end-start {
|
||||
end = nSrc + d
|
||||
}
|
||||
for nSrc++; nSrc < end && src[nSrc] < utf8.RuneSelf; nSrc++ {
|
||||
}
|
||||
n := copy(dst[nDst:], src[start:nSrc])
|
||||
if nDst += n; nDst == len(dst) {
|
||||
nSrc = start + n
|
||||
if nSrc == len(src) {
|
||||
return nDst, nSrc, nil
|
||||
}
|
||||
if src[nSrc] < utf8.RuneSelf {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
v, size := trie.lookup(src[nSrc:])
|
||||
if size == 0 { // incomplete UTF-8 encoding
|
||||
if !atEOF {
|
||||
return nDst, nSrc, transform.ErrShortSrc
|
||||
}
|
||||
size = 1 // gobble 1 byte
|
||||
}
|
||||
if k := elem(v).kind(); byte(v) == 0 || k != EastAsianFullwidth && k != EastAsianWide && k != EastAsianAmbiguous {
|
||||
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
nDst += size
|
||||
} else {
|
||||
data := inverseData[byte(v)]
|
||||
if len(dst)-nDst < int(data[0]) {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
i := 1
|
||||
for end := int(data[0]); i < end; i++ {
|
||||
dst[nDst] = data[i]
|
||||
nDst++
|
||||
}
|
||||
dst[nDst] = data[i] ^ src[nSrc+size-1]
|
||||
nDst++
|
||||
}
|
||||
nSrc += size
|
||||
}
|
||||
return nDst, nSrc, nil
|
||||
}
|
||||
|
||||
type wideTransform struct {
|
||||
transform.NopResetter
|
||||
}
|
||||
|
||||
func (wideTransform) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
for n < len(src) {
|
||||
// TODO: Consider ASCII fast path. Special-casing ASCII handling can
|
||||
// reduce the ns/op of BenchmarkWideASCII by about 30%. This is probably
|
||||
// not enough to warrant the extra code and complexity.
|
||||
v, size := trie.lookup(src[n:])
|
||||
if size == 0 { // incomplete UTF-8 encoding
|
||||
if !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
} else {
|
||||
n = len(src)
|
||||
}
|
||||
break
|
||||
}
|
||||
if k := elem(v).kind(); byte(v) == 0 || k != EastAsianHalfwidth && k != EastAsianNarrow {
|
||||
} else {
|
||||
err = transform.ErrEndOfSpan
|
||||
break
|
||||
}
|
||||
n += size
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (wideTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
for nSrc < len(src) {
|
||||
// TODO: Consider ASCII fast path. Special-casing ASCII handling can
|
||||
// reduce the ns/op of BenchmarkWideASCII by about 30%. This is probably
|
||||
// not enough to warrant the extra code and complexity.
|
||||
v, size := trie.lookup(src[nSrc:])
|
||||
if size == 0 { // incomplete UTF-8 encoding
|
||||
if !atEOF {
|
||||
return nDst, nSrc, transform.ErrShortSrc
|
||||
}
|
||||
size = 1 // gobble 1 byte
|
||||
}
|
||||
if k := elem(v).kind(); byte(v) == 0 || k != EastAsianHalfwidth && k != EastAsianNarrow {
|
||||
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
nDst += size
|
||||
} else {
|
||||
data := inverseData[byte(v)]
|
||||
if len(dst)-nDst < int(data[0]) {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
i := 1
|
||||
for end := int(data[0]); i < end; i++ {
|
||||
dst[nDst] = data[i]
|
||||
nDst++
|
||||
}
|
||||
dst[nDst] = data[i] ^ src[nSrc+size-1]
|
||||
nDst++
|
||||
}
|
||||
nSrc += size
|
||||
}
|
||||
return nDst, nSrc, nil
|
||||
}
|
30
vendor/golang.org/x/text/width/trieval.go
generated
vendored
30
vendor/golang.org/x/text/width/trieval.go
generated
vendored
|
@ -1,30 +0,0 @@
|
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package width
|
||||
|
||||
// elem is an entry of the width trie. The high byte is used to encode the type
|
||||
// of the rune. The low byte is used to store the index to a mapping entry in
|
||||
// the inverseData array.
|
||||
type elem uint16
|
||||
|
||||
const (
|
||||
tagNeutral elem = iota << typeShift
|
||||
tagAmbiguous
|
||||
tagWide
|
||||
tagNarrow
|
||||
tagFullwidth
|
||||
tagHalfwidth
|
||||
)
|
||||
|
||||
const (
|
||||
numTypeBits = 3
|
||||
typeShift = 16 - numTypeBits
|
||||
|
||||
// tagNeedsFold is true for all fullwidth and halfwidth runes except for
|
||||
// the Won sign U+20A9.
|
||||
tagNeedsFold = 0x1000
|
||||
|
||||
// The Korean Won sign is halfwidth, but SHOULD NOT be mapped to a wide
|
||||
// variant.
|
||||
wonSign rune = 0x20A9
|
||||
)
|
206
vendor/golang.org/x/text/width/width.go
generated
vendored
206
vendor/golang.org/x/text/width/width.go
generated
vendored
|
@ -1,206 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate stringer -type=Kind
|
||||
//go:generate go run gen.go gen_common.go gen_trieval.go
|
||||
|
||||
// Package width provides functionality for handling different widths in text.
|
||||
//
|
||||
// Wide characters behave like ideographs; they tend to allow line breaks after
|
||||
// each character and remain upright in vertical text layout. Narrow characters
|
||||
// are kept together in words or runs that are rotated sideways in vertical text
|
||||
// layout.
|
||||
//
|
||||
// For more information, see http://unicode.org/reports/tr11/.
|
||||
package width // import "golang.org/x/text/width"
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// TODO
|
||||
// 1) Reduce table size by compressing blocks.
|
||||
// 2) API proposition for computing display length
|
||||
// (approximation, fixed pitch only).
|
||||
// 3) Implement display length.
|
||||
|
||||
// Kind indicates the type of width property as defined in http://unicode.org/reports/tr11/.
|
||||
type Kind int
|
||||
|
||||
const (
|
||||
// Neutral characters do not occur in legacy East Asian character sets.
|
||||
Neutral Kind = iota
|
||||
|
||||
// EastAsianAmbiguous characters that can be sometimes wide and sometimes
|
||||
// narrow and require additional information not contained in the character
|
||||
// code to further resolve their width.
|
||||
EastAsianAmbiguous
|
||||
|
||||
// EastAsianWide characters are wide in its usual form. They occur only in
|
||||
// the context of East Asian typography. These runes may have explicit
|
||||
// halfwidth counterparts.
|
||||
EastAsianWide
|
||||
|
||||
// EastAsianNarrow characters are narrow in its usual form. They often have
|
||||
// fullwidth counterparts.
|
||||
EastAsianNarrow
|
||||
|
||||
// Note: there exist Narrow runes that do not have fullwidth or wide
|
||||
// counterparts, despite what the definition says (e.g. U+27E6).
|
||||
|
||||
// EastAsianFullwidth characters have a compatibility decompositions of type
|
||||
// wide that map to a narrow counterpart.
|
||||
EastAsianFullwidth
|
||||
|
||||
// EastAsianHalfwidth characters have a compatibility decomposition of type
|
||||
// narrow that map to a wide or ambiguous counterpart, plus U+20A9 ₩ WON
|
||||
// SIGN.
|
||||
EastAsianHalfwidth
|
||||
|
||||
// Note: there exist runes that have a halfwidth counterparts but that are
|
||||
// classified as Ambiguous, rather than wide (e.g. U+2190).
|
||||
)
|
||||
|
||||
// TODO: the generated tries need to return size 1 for invalid runes for the
|
||||
// width to be computed correctly (each byte should render width 1)
|
||||
|
||||
var trie = newWidthTrie(0)
|
||||
|
||||
// Lookup reports the Properties of the first rune in b and the number of bytes
|
||||
// of its UTF-8 encoding.
|
||||
func Lookup(b []byte) (p Properties, size int) {
|
||||
v, sz := trie.lookup(b)
|
||||
return Properties{elem(v), b[sz-1]}, sz
|
||||
}
|
||||
|
||||
// LookupString reports the Properties of the first rune in s and the number of
|
||||
// bytes of its UTF-8 encoding.
|
||||
func LookupString(s string) (p Properties, size int) {
|
||||
v, sz := trie.lookupString(s)
|
||||
return Properties{elem(v), s[sz-1]}, sz
|
||||
}
|
||||
|
||||
// LookupRune reports the Properties of rune r.
|
||||
func LookupRune(r rune) Properties {
|
||||
var buf [4]byte
|
||||
n := utf8.EncodeRune(buf[:], r)
|
||||
v, _ := trie.lookup(buf[:n])
|
||||
last := byte(r)
|
||||
if r >= utf8.RuneSelf {
|
||||
last = 0x80 + byte(r&0x3f)
|
||||
}
|
||||
return Properties{elem(v), last}
|
||||
}
|
||||
|
||||
// Properties provides access to width properties of a rune.
|
||||
type Properties struct {
|
||||
elem elem
|
||||
last byte
|
||||
}
|
||||
|
||||
func (e elem) kind() Kind {
|
||||
return Kind(e >> typeShift)
|
||||
}
|
||||
|
||||
// Kind returns the Kind of a rune as defined in Unicode TR #11.
|
||||
// See http://unicode.org/reports/tr11/ for more details.
|
||||
func (p Properties) Kind() Kind {
|
||||
return p.elem.kind()
|
||||
}
|
||||
|
||||
// Folded returns the folded variant of a rune or 0 if the rune is canonical.
|
||||
func (p Properties) Folded() rune {
|
||||
if p.elem&tagNeedsFold != 0 {
|
||||
buf := inverseData[byte(p.elem)]
|
||||
buf[buf[0]] ^= p.last
|
||||
r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
|
||||
return r
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// Narrow returns the narrow variant of a rune or 0 if the rune is already
|
||||
// narrow or doesn't have a narrow variant.
|
||||
func (p Properties) Narrow() rune {
|
||||
if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianFullwidth || k == EastAsianWide || k == EastAsianAmbiguous) {
|
||||
buf := inverseData[byte(p.elem)]
|
||||
buf[buf[0]] ^= p.last
|
||||
r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
|
||||
return r
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// Wide returns the wide variant of a rune or 0 if the rune is already
|
||||
// wide or doesn't have a wide variant.
|
||||
func (p Properties) Wide() rune {
|
||||
if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianHalfwidth || k == EastAsianNarrow) {
|
||||
buf := inverseData[byte(p.elem)]
|
||||
buf[buf[0]] ^= p.last
|
||||
r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
|
||||
return r
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// TODO for Properties:
|
||||
// - Add Fullwidth/Halfwidth or Inverted methods for computing variants
|
||||
// mapping.
|
||||
// - Add width information (including information on non-spacing runes).
|
||||
|
||||
// Transformer implements the transform.Transformer interface.
|
||||
type Transformer struct {
|
||||
t transform.SpanningTransformer
|
||||
}
|
||||
|
||||
// Reset implements the transform.Transformer interface.
|
||||
func (t Transformer) Reset() { t.t.Reset() }
|
||||
|
||||
// Transform implements the transform.Transformer interface.
|
||||
func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
return t.t.Transform(dst, src, atEOF)
|
||||
}
|
||||
|
||||
// Span implements the transform.SpanningTransformer interface.
|
||||
func (t Transformer) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
return t.t.Span(src, atEOF)
|
||||
}
|
||||
|
||||
// Bytes returns a new byte slice with the result of applying t to b.
|
||||
func (t Transformer) Bytes(b []byte) []byte {
|
||||
b, _, _ = transform.Bytes(t, b)
|
||||
return b
|
||||
}
|
||||
|
||||
// String returns a string with the result of applying t to s.
|
||||
func (t Transformer) String(s string) string {
|
||||
s, _, _ = transform.String(t, s)
|
||||
return s
|
||||
}
|
||||
|
||||
var (
|
||||
// Fold is a transform that maps all runes to their canonical width.
|
||||
//
|
||||
// Note that the NFKC and NFKD transforms in golang.org/x/text/unicode/norm
|
||||
// provide a more generic folding mechanism.
|
||||
Fold Transformer = Transformer{foldTransform{}}
|
||||
|
||||
// Widen is a transform that maps runes to their wide variant, if
|
||||
// available.
|
||||
Widen Transformer = Transformer{wideTransform{}}
|
||||
|
||||
// Narrow is a transform that maps runes to their narrow variant, if
|
||||
// available.
|
||||
Narrow Transformer = Transformer{narrowTransform{}}
|
||||
)
|
||||
|
||||
// TODO: Consider the following options:
|
||||
// - Treat Ambiguous runes that have a halfwidth counterpart as wide, or some
|
||||
// generalized variant of this.
|
||||
// - Consider a wide Won character to be the default width (or some generalized
|
||||
// variant of this).
|
||||
// - Filter the set of characters that gets converted (the preferred approach is
|
||||
// to allow applying filters to transforms).
|
Loading…
Add table
Add a link
Reference in a new issue