1
0
Fork 0

Update dependencies

This commit is contained in:
Ed Robinson 2017-04-07 10:09:57 +01:00
parent 51e4dcbb1f
commit 65284441fa
No known key found for this signature in database
GPG key ID: EC501FCA6421CCF0
98 changed files with 25265 additions and 1992 deletions

162
vendor/golang.org/x/text/cases/cases.go generated vendored Normal file
View file

@ -0,0 +1,162 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go gen_trieval.go
// Package cases provides general and language-specific case mappers.
package cases // import "golang.org/x/text/cases"
import (
"golang.org/x/text/language"
"golang.org/x/text/transform"
)
// References:
// - Unicode Reference Manual Chapter 3.13, 4.2, and 5.18.
// - http://www.unicode.org/reports/tr29/
// - http://www.unicode.org/Public/6.3.0/ucd/CaseFolding.txt
// - http://www.unicode.org/Public/6.3.0/ucd/SpecialCasing.txt
// - http://www.unicode.org/Public/6.3.0/ucd/DerivedCoreProperties.txt
// - http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt
// - http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakTest.txt
// - http://userguide.icu-project.org/transforms/casemappings
// TODO:
// - Case folding
// - Wide and Narrow?
// - Segmenter option for title casing.
// - ASCII fast paths
// - Encode Soft-Dotted property within trie somehow.
// A Caser transforms given input to a certain case. It implements
// transform.Transformer.
//
// A Caser may be stateful and should therefore not be shared between
// goroutines.
type Caser struct {
t transform.SpanningTransformer
}
// Bytes returns a new byte slice with the result of converting b to the case
// form implemented by c.
func (c Caser) Bytes(b []byte) []byte {
b, _, _ = transform.Bytes(c.t, b)
return b
}
// String returns a string with the result of transforming s to the case form
// implemented by c.
func (c Caser) String(s string) string {
s, _, _ = transform.String(c.t, s)
return s
}
// Reset resets the Caser to be reused for new input after a previous call to
// Transform.
func (c Caser) Reset() { c.t.Reset() }
// Transform implements the transform.Transformer interface and transforms the
// given input to the case form implemented by c.
func (c Caser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
return c.t.Transform(dst, src, atEOF)
}
// Span implements the transform.SpanningTransformer interface.
func (c Caser) Span(src []byte, atEOF bool) (n int, err error) {
return c.t.Span(src, atEOF)
}
// Upper returns a Caser for language-specific uppercasing.
func Upper(t language.Tag, opts ...Option) Caser {
return Caser{makeUpper(t, getOpts(opts...))}
}
// Lower returns a Caser for language-specific lowercasing.
func Lower(t language.Tag, opts ...Option) Caser {
return Caser{makeLower(t, getOpts(opts...))}
}
// Title returns a Caser for language-specific title casing. It uses an
// approximation of the default Unicode Word Break algorithm.
func Title(t language.Tag, opts ...Option) Caser {
return Caser{makeTitle(t, getOpts(opts...))}
}
// Fold returns a Caser that implements Unicode case folding. The returned Caser
// is stateless and safe to use concurrently by multiple goroutines.
//
// Case folding does not normalize the input and may not preserve a normal form.
// Use the collate or search package for more convenient and linguistically
// sound comparisons. Use unicode/precis for string comparisons where security
// aspects are a concern.
func Fold(opts ...Option) Caser {
return Caser{makeFold(getOpts(opts...))}
}
// An Option is used to modify the behavior of a Caser.
type Option func(o options) options
// TODO: consider these options to take a boolean as well, like FinalSigma.
// The advantage of using this approach is that other providers of a lower-case
// algorithm could set different defaults by prefixing a user-provided slice
// of options with their own. This is handy, for instance, for the precis
// package which would override the default to not handle the Greek final sigma.
var (
// NoLower disables the lowercasing of non-leading letters for a title
// caser.
NoLower Option = noLower
// Compact omits mappings in case folding for characters that would grow the
// input. (Unimplemented.)
Compact Option = compact
)
// TODO: option to preserve a normal form, if applicable?
type options struct {
noLower bool
simple bool
// TODO: segmenter, max ignorable, alternative versions, etc.
ignoreFinalSigma bool
}
func getOpts(o ...Option) (res options) {
for _, f := range o {
res = f(res)
}
return
}
func noLower(o options) options {
o.noLower = true
return o
}
func compact(o options) options {
o.simple = true
return o
}
// HandleFinalSigma specifies whether the special handling of Greek final sigma
// should be enabled. Unicode prescribes handling the Greek final sigma for all
// locales, but standards like IDNA and PRECIS override this default.
func HandleFinalSigma(enable bool) Option {
if enable {
return handleFinalSigma
}
return ignoreFinalSigma
}
func ignoreFinalSigma(o options) options {
o.ignoreFinalSigma = true
return o
}
func handleFinalSigma(o options) options {
o.ignoreFinalSigma = false
return o
}

376
vendor/golang.org/x/text/cases/context.go generated vendored Normal file
View file

@ -0,0 +1,376 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cases
import "golang.org/x/text/transform"
// A context is used for iterating over source bytes, fetching case info and
// writing to a destination buffer.
//
// Casing operations may need more than one rune of context to decide how a rune
// should be cased. Casing implementations should call checkpoint on context
// whenever it is known to be safe to return the runes processed so far.
//
// It is recommended for implementations to not allow for more than 30 case
// ignorables as lookahead (analogous to the limit in norm) and to use state if
// unbounded lookahead is needed for cased runes.
type context struct {
dst, src []byte
atEOF bool
pDst int // pDst points past the last written rune in dst.
pSrc int // pSrc points to the start of the currently scanned rune.
// checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc.
nDst, nSrc int
err error
sz int // size of current rune
info info // case information of currently scanned rune
// State preserved across calls to Transform.
isMidWord bool // false if next cased letter needs to be title-cased.
}
func (c *context) Reset() {
c.isMidWord = false
}
// ret returns the return values for the Transform method. It checks whether
// there were insufficient bytes in src to complete and introduces an error
// accordingly, if necessary.
func (c *context) ret() (nDst, nSrc int, err error) {
if c.err != nil || c.nSrc == len(c.src) {
return c.nDst, c.nSrc, c.err
}
// This point is only reached by mappers if there was no short destination
// buffer. This means that the source buffer was exhausted and that c.sz was
// set to 0 by next.
if c.atEOF && c.pSrc == len(c.src) {
return c.pDst, c.pSrc, nil
}
return c.nDst, c.nSrc, transform.ErrShortSrc
}
// retSpan returns the return values for the Span method. It checks whether
// there were insufficient bytes in src to complete and introduces an error
// accordingly, if necessary.
func (c *context) retSpan() (n int, err error) {
_, nSrc, err := c.ret()
return nSrc, err
}
// checkpoint sets the return value buffer points for Transform to the current
// positions.
func (c *context) checkpoint() {
if c.err == nil {
c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz
}
}
// unreadRune causes the last rune read by next to be reread on the next
// invocation of next. Only one unreadRune may be called after a call to next.
func (c *context) unreadRune() {
c.sz = 0
}
func (c *context) next() bool {
c.pSrc += c.sz
if c.pSrc == len(c.src) || c.err != nil {
c.info, c.sz = 0, 0
return false
}
v, sz := trie.lookup(c.src[c.pSrc:])
c.info, c.sz = info(v), sz
if c.sz == 0 {
if c.atEOF {
// A zero size means we have an incomplete rune. If we are atEOF,
// this means it is an illegal rune, which we will consume one
// byte at a time.
c.sz = 1
} else {
c.err = transform.ErrShortSrc
return false
}
}
return true
}
// writeBytes adds bytes to dst.
func (c *context) writeBytes(b []byte) bool {
if len(c.dst)-c.pDst < len(b) {
c.err = transform.ErrShortDst
return false
}
// This loop is faster than using copy.
for _, ch := range b {
c.dst[c.pDst] = ch
c.pDst++
}
return true
}
// writeString writes the given string to dst.
func (c *context) writeString(s string) bool {
if len(c.dst)-c.pDst < len(s) {
c.err = transform.ErrShortDst
return false
}
// This loop is faster than using copy.
for i := 0; i < len(s); i++ {
c.dst[c.pDst] = s[i]
c.pDst++
}
return true
}
// copy writes the current rune to dst.
func (c *context) copy() bool {
return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz])
}
// copyXOR copies the current rune to dst and modifies it by applying the XOR
// pattern of the case info. It is the responsibility of the caller to ensure
// that this is a rune with a XOR pattern defined.
func (c *context) copyXOR() bool {
if !c.copy() {
return false
}
if c.info&xorIndexBit == 0 {
// Fast path for 6-bit XOR pattern, which covers most cases.
c.dst[c.pDst-1] ^= byte(c.info >> xorShift)
} else {
// Interpret XOR bits as an index.
// TODO: test performance for unrolling this loop. Verify that we have
// at least two bytes and at most three.
idx := c.info >> xorShift
for p := c.pDst - 1; ; p-- {
c.dst[p] ^= xorData[idx]
idx--
if xorData[idx] == 0 {
break
}
}
}
return true
}
// hasPrefix returns true if src[pSrc:] starts with the given string.
func (c *context) hasPrefix(s string) bool {
b := c.src[c.pSrc:]
if len(b) < len(s) {
return false
}
for i, c := range b[:len(s)] {
if c != s[i] {
return false
}
}
return true
}
// caseType returns an info with only the case bits, normalized to either
// cLower, cUpper, cTitle or cUncased.
func (c *context) caseType() info {
cm := c.info & 0x7
if cm < 4 {
return cm
}
if cm >= cXORCase {
// xor the last bit of the rune with the case type bits.
b := c.src[c.pSrc+c.sz-1]
return info(b&1) ^ cm&0x3
}
if cm == cIgnorableCased {
return cLower
}
return cUncased
}
// lower writes the lowercase version of the current rune to dst.
func lower(c *context) bool {
ct := c.caseType()
if c.info&hasMappingMask == 0 || ct == cLower {
return c.copy()
}
if c.info&exceptionBit == 0 {
return c.copyXOR()
}
e := exceptions[c.info>>exceptionShift:]
offset := 2 + e[0]&lengthMask // size of header + fold string
if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
return c.writeString(e[offset : offset+nLower])
}
return c.copy()
}
func isLower(c *context) bool {
ct := c.caseType()
if c.info&hasMappingMask == 0 || ct == cLower {
return true
}
if c.info&exceptionBit == 0 {
c.err = transform.ErrEndOfSpan
return false
}
e := exceptions[c.info>>exceptionShift:]
if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
c.err = transform.ErrEndOfSpan
return false
}
return true
}
// upper writes the uppercase version of the current rune to dst.
func upper(c *context) bool {
ct := c.caseType()
if c.info&hasMappingMask == 0 || ct == cUpper {
return c.copy()
}
if c.info&exceptionBit == 0 {
return c.copyXOR()
}
e := exceptions[c.info>>exceptionShift:]
offset := 2 + e[0]&lengthMask // size of header + fold string
// Get length of first special case mapping.
n := (e[1] >> lengthBits) & lengthMask
if ct == cTitle {
// The first special case mapping is for lower. Set n to the second.
if n == noChange {
n = 0
}
n, e = e[1]&lengthMask, e[n:]
}
if n != noChange {
return c.writeString(e[offset : offset+n])
}
return c.copy()
}
// isUpper writes the isUppercase version of the current rune to dst.
func isUpper(c *context) bool {
ct := c.caseType()
if c.info&hasMappingMask == 0 || ct == cUpper {
return true
}
if c.info&exceptionBit == 0 {
c.err = transform.ErrEndOfSpan
return false
}
e := exceptions[c.info>>exceptionShift:]
// Get length of first special case mapping.
n := (e[1] >> lengthBits) & lengthMask
if ct == cTitle {
n = e[1] & lengthMask
}
if n != noChange {
c.err = transform.ErrEndOfSpan
return false
}
return true
}
// title writes the title case version of the current rune to dst.
func title(c *context) bool {
ct := c.caseType()
if c.info&hasMappingMask == 0 || ct == cTitle {
return c.copy()
}
if c.info&exceptionBit == 0 {
if ct == cLower {
return c.copyXOR()
}
return c.copy()
}
// Get the exception data.
e := exceptions[c.info>>exceptionShift:]
offset := 2 + e[0]&lengthMask // size of header + fold string
nFirst := (e[1] >> lengthBits) & lengthMask
if nTitle := e[1] & lengthMask; nTitle != noChange {
if nFirst != noChange {
e = e[nFirst:]
}
return c.writeString(e[offset : offset+nTitle])
}
if ct == cLower && nFirst != noChange {
// Use the uppercase version instead.
return c.writeString(e[offset : offset+nFirst])
}
// Already in correct case.
return c.copy()
}
// isTitle reports whether the current rune is in title case.
func isTitle(c *context) bool {
ct := c.caseType()
if c.info&hasMappingMask == 0 || ct == cTitle {
return true
}
if c.info&exceptionBit == 0 {
if ct == cLower {
c.err = transform.ErrEndOfSpan
return false
}
return true
}
// Get the exception data.
e := exceptions[c.info>>exceptionShift:]
if nTitle := e[1] & lengthMask; nTitle != noChange {
c.err = transform.ErrEndOfSpan
return false
}
nFirst := (e[1] >> lengthBits) & lengthMask
if ct == cLower && nFirst != noChange {
c.err = transform.ErrEndOfSpan
return false
}
return true
}
// foldFull writes the foldFull version of the current rune to dst.
func foldFull(c *context) bool {
if c.info&hasMappingMask == 0 {
return c.copy()
}
ct := c.caseType()
if c.info&exceptionBit == 0 {
if ct != cLower || c.info&inverseFoldBit != 0 {
return c.copyXOR()
}
return c.copy()
}
e := exceptions[c.info>>exceptionShift:]
n := e[0] & lengthMask
if n == 0 {
if ct == cLower {
return c.copy()
}
n = (e[1] >> lengthBits) & lengthMask
}
return c.writeString(e[2 : 2+n])
}
// isFoldFull reports whether the current run is mapped to foldFull
func isFoldFull(c *context) bool {
if c.info&hasMappingMask == 0 {
return true
}
ct := c.caseType()
if c.info&exceptionBit == 0 {
if ct != cLower || c.info&inverseFoldBit != 0 {
c.err = transform.ErrEndOfSpan
return false
}
return true
}
e := exceptions[c.info>>exceptionShift:]
n := e[0] & lengthMask
if n == 0 && ct == cLower {
return true
}
c.err = transform.ErrEndOfSpan
return false
}

34
vendor/golang.org/x/text/cases/fold.go generated vendored Normal file
View file

@ -0,0 +1,34 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cases
import "golang.org/x/text/transform"
type caseFolder struct{ transform.NopResetter }
// caseFolder implements the Transformer interface for doing case folding.
func (t *caseFolder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
c := context{dst: dst, src: src, atEOF: atEOF}
for c.next() {
foldFull(&c)
c.checkpoint()
}
return c.ret()
}
func (t *caseFolder) Span(src []byte, atEOF bool) (n int, err error) {
c := context{src: src, atEOF: atEOF}
for c.next() && isFoldFull(&c) {
c.checkpoint()
}
return c.retSpan()
}
func makeFold(o options) transform.SpanningTransformer {
// TODO: Special case folding, through option Language, Special/Turkic, or
// both.
// TODO: Implement Compact options.
return &caseFolder{}
}

839
vendor/golang.org/x/text/cases/gen.go generated vendored Normal file
View file

@ -0,0 +1,839 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// This program generates the trie for casing operations. The Unicode casing
// algorithm requires the lookup of various properties and mappings for each
// rune. The table generated by this generator combines several of the most
// frequently used of these into a single trie so that they can be accessed
// with a single lookup.
package main
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"log"
"reflect"
"strconv"
"strings"
"unicode"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/triegen"
"golang.org/x/text/internal/ucd"
"golang.org/x/text/unicode/norm"
)
func main() {
gen.Init()
genTables()
genTablesTest()
gen.Repackage("gen_trieval.go", "trieval.go", "cases")
}
// runeInfo contains all information for a rune that we care about for casing
// operations.
type runeInfo struct {
Rune rune
entry info // trie value for this rune.
CaseMode info
// Simple case mappings.
Simple [1 + maxCaseMode][]rune
// Special casing
HasSpecial bool
Conditional bool
Special [1 + maxCaseMode][]rune
// Folding
FoldSimple rune
FoldSpecial rune
FoldFull []rune
// TODO: FC_NFKC, or equivalent data.
// Properties
SoftDotted bool
CaseIgnorable bool
Cased bool
DecomposeGreek bool
BreakType string
BreakCat breakCategory
// We care mostly about 0, Above, and IotaSubscript.
CCC byte
}
type breakCategory int
const (
breakBreak breakCategory = iota
breakLetter
breakMid
)
// mapping returns the case mapping for the given case type.
func (r *runeInfo) mapping(c info) string {
if r.HasSpecial {
return string(r.Special[c])
}
if len(r.Simple[c]) != 0 {
return string(r.Simple[c])
}
return string(r.Rune)
}
func parse(file string, f func(p *ucd.Parser)) {
ucd.Parse(gen.OpenUCDFile(file), f)
}
func parseUCD() []runeInfo {
chars := make([]runeInfo, unicode.MaxRune)
get := func(r rune) *runeInfo {
c := &chars[r]
c.Rune = r
return c
}
parse("UnicodeData.txt", func(p *ucd.Parser) {
ri := get(p.Rune(0))
ri.CCC = byte(p.Int(ucd.CanonicalCombiningClass))
ri.Simple[cLower] = p.Runes(ucd.SimpleLowercaseMapping)
ri.Simple[cUpper] = p.Runes(ucd.SimpleUppercaseMapping)
ri.Simple[cTitle] = p.Runes(ucd.SimpleTitlecaseMapping)
if p.String(ucd.GeneralCategory) == "Lt" {
ri.CaseMode = cTitle
}
})
// <code>; <property>
parse("PropList.txt", func(p *ucd.Parser) {
if p.String(1) == "Soft_Dotted" {
chars[p.Rune(0)].SoftDotted = true
}
})
// <code>; <word break type>
parse("DerivedCoreProperties.txt", func(p *ucd.Parser) {
ri := get(p.Rune(0))
switch p.String(1) {
case "Case_Ignorable":
ri.CaseIgnorable = true
case "Cased":
ri.Cased = true
case "Lowercase":
ri.CaseMode = cLower
case "Uppercase":
ri.CaseMode = cUpper
}
})
// <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)?
parse("SpecialCasing.txt", func(p *ucd.Parser) {
// We drop all conditional special casing and deal with them manually in
// the language-specific case mappers. Rune 0x03A3 is the only one with
// a conditional formatting that is not language-specific. However,
// dealing with this letter is tricky, especially in a streaming
// context, so we deal with it in the Caser for Greek specifically.
ri := get(p.Rune(0))
if p.String(4) == "" {
ri.HasSpecial = true
ri.Special[cLower] = p.Runes(1)
ri.Special[cTitle] = p.Runes(2)
ri.Special[cUpper] = p.Runes(3)
} else {
ri.Conditional = true
}
})
// TODO: Use text breaking according to UAX #29.
// <code>; <word break type>
parse("auxiliary/WordBreakProperty.txt", func(p *ucd.Parser) {
ri := get(p.Rune(0))
ri.BreakType = p.String(1)
// We collapse the word breaking properties onto the categories we need.
switch p.String(1) { // TODO: officially we need to canonicalize.
case "MidLetter", "MidNumLet", "Single_Quote":
ri.BreakCat = breakMid
if !ri.CaseIgnorable {
// finalSigma relies on the fact that all breakMid runes are
// also a Case_Ignorable. Revisit this code when this changes.
log.Fatalf("Rune %U, which has a break category mid, is not a case ignorable", ri)
}
case "ALetter", "Hebrew_Letter", "Numeric", "Extend", "ExtendNumLet", "Format", "ZWJ":
ri.BreakCat = breakLetter
}
})
// <code>; <type>; <mapping>
parse("CaseFolding.txt", func(p *ucd.Parser) {
ri := get(p.Rune(0))
switch p.String(1) {
case "C":
ri.FoldSimple = p.Rune(2)
ri.FoldFull = p.Runes(2)
case "S":
ri.FoldSimple = p.Rune(2)
case "T":
ri.FoldSpecial = p.Rune(2)
case "F":
ri.FoldFull = p.Runes(2)
default:
log.Fatalf("%U: unknown type: %s", p.Rune(0), p.String(1))
}
})
return chars
}
func genTables() {
chars := parseUCD()
verifyProperties(chars)
t := triegen.NewTrie("case")
for i := range chars {
c := &chars[i]
makeEntry(c)
t.Insert(rune(i), uint64(c.entry))
}
w := gen.NewCodeWriter()
defer w.WriteGoFile("tables.go", "cases")
gen.WriteUnicodeVersion(w)
// TODO: write CLDR version after adding a mechanism to detect that the
// tables on which the manually created locale-sensitive casing code is
// based hasn't changed.
w.WriteVar("xorData", string(xorData))
w.WriteVar("exceptions", string(exceptionData))
sz, err := t.Gen(w, triegen.Compact(&sparseCompacter{}))
if err != nil {
log.Fatal(err)
}
w.Size += sz
}
func makeEntry(ri *runeInfo) {
if ri.CaseIgnorable {
if ri.Cased {
ri.entry = cIgnorableCased
} else {
ri.entry = cIgnorableUncased
}
} else {
ri.entry = ri.CaseMode
}
// TODO: handle soft-dotted.
ccc := cccOther
switch ri.CCC {
case 0: // Not_Reordered
ccc = cccZero
case above: // Above
ccc = cccAbove
}
switch ri.BreakCat {
case breakBreak:
ccc = cccBreak
case breakMid:
ri.entry |= isMidBit
}
ri.entry |= ccc
if ri.CaseMode == cUncased {
return
}
// Need to do something special.
if ri.CaseMode == cTitle || ri.HasSpecial || ri.mapping(cTitle) != ri.mapping(cUpper) {
makeException(ri)
return
}
if f := string(ri.FoldFull); len(f) > 0 && f != ri.mapping(cUpper) && f != ri.mapping(cLower) {
makeException(ri)
return
}
// Rune is either lowercase or uppercase.
orig := string(ri.Rune)
mapped := ""
if ri.CaseMode == cUpper {
mapped = ri.mapping(cLower)
} else {
mapped = ri.mapping(cUpper)
}
if len(orig) != len(mapped) {
makeException(ri)
return
}
if string(ri.FoldFull) == ri.mapping(cUpper) {
ri.entry |= inverseFoldBit
}
n := len(orig)
// Create per-byte XOR mask.
var b []byte
for i := 0; i < n; i++ {
b = append(b, orig[i]^mapped[i])
}
// Remove leading 0 bytes, but keep at least one byte.
for ; len(b) > 1 && b[0] == 0; b = b[1:] {
}
if len(b) == 1 && b[0]&0xc0 == 0 {
ri.entry |= info(b[0]) << xorShift
return
}
key := string(b)
x, ok := xorCache[key]
if !ok {
xorData = append(xorData, 0) // for detecting start of sequence
xorData = append(xorData, b...)
x = len(xorData) - 1
xorCache[key] = x
}
ri.entry |= info(x<<xorShift) | xorIndexBit
}
var xorCache = map[string]int{}
// xorData contains byte-wise XOR data for the least significant bytes of a
// UTF-8 encoded rune. An index points to the last byte. The sequence starts
// with a zero terminator.
var xorData = []byte{}
// See the comments in gen_trieval.go re "the exceptions slice".
var exceptionData = []byte{0}
// makeException encodes case mappings that cannot be expressed in a simple
// XOR diff.
func makeException(ri *runeInfo) {
ccc := ri.entry & cccMask
// Set exception bit and retain case type.
ri.entry &= 0x0007
ri.entry |= exceptionBit
if len(exceptionData) >= 1<<numExceptionBits {
log.Fatalf("%U:exceptionData too large %x > %d bits", ri.Rune, len(exceptionData), numExceptionBits)
}
// Set the offset in the exceptionData array.
ri.entry |= info(len(exceptionData) << exceptionShift)
orig := string(ri.Rune)
tc := ri.mapping(cTitle)
uc := ri.mapping(cUpper)
lc := ri.mapping(cLower)
ff := string(ri.FoldFull)
// addString sets the length of a string and adds it to the expansions array.
addString := func(s string, b *byte) {
if len(s) == 0 {
// Zero-length mappings exist, but only for conditional casing,
// which we are representing outside of this table.
log.Fatalf("%U: has zero-length mapping.", ri.Rune)
}
*b <<= 3
if s != orig {
n := len(s)
if n > 7 {
log.Fatalf("%U: mapping larger than 7 (%d)", ri.Rune, n)
}
*b |= byte(n)
exceptionData = append(exceptionData, s...)
}
}
// byte 0:
exceptionData = append(exceptionData, byte(ccc)|byte(len(ff)))
// byte 1:
p := len(exceptionData)
exceptionData = append(exceptionData, 0)
if len(ff) > 7 { // May be zero-length.
log.Fatalf("%U: fold string larger than 7 (%d)", ri.Rune, len(ff))
}
exceptionData = append(exceptionData, ff...)
ct := ri.CaseMode
if ct != cLower {
addString(lc, &exceptionData[p])
}
if ct != cUpper {
addString(uc, &exceptionData[p])
}
if ct != cTitle {
// If title is the same as upper, we set it to the original string so
// that it will be marked as not present. This implies title case is
// the same as upper case.
if tc == uc {
tc = orig
}
addString(tc, &exceptionData[p])
}
}
// sparseCompacter is a trie value block Compacter. There are many cases where
// successive runes alternate between lower- and upper-case. This Compacter
// exploits this by adding a special case type where the case value is obtained
// from or-ing it with the least-significant bit of the rune, creating large
// ranges of equal case values that compress well.
type sparseCompacter struct {
sparseBlocks [][]uint16
sparseOffsets []uint16
sparseCount int
}
// makeSparse returns the number of elements that compact block would contain
// as well as the modified values.
func makeSparse(vals []uint64) ([]uint16, int) {
// Copy the values.
values := make([]uint16, len(vals))
for i, v := range vals {
values[i] = uint16(v)
}
alt := func(i int, v uint16) uint16 {
if cm := info(v & fullCasedMask); cm == cUpper || cm == cLower {
// Convert cLower or cUpper to cXORCase value, which has the form 11x.
xor := v
xor &^= 1
xor |= uint16(i&1) ^ (v & 1)
xor |= 0x4
return xor
}
return v
}
var count int
var previous uint16
for i, v := range values {
if v != 0 {
// Try if the unmodified value is equal to the previous.
if v == previous {
continue
}
// Try if the xor-ed value is equal to the previous value.
a := alt(i, v)
if a == previous {
values[i] = a
continue
}
// This is a new value.
count++
// Use the xor-ed value if it will be identical to the next value.
if p := i + 1; p < len(values) && alt(p, values[p]) == a {
values[i] = a
v = a
}
}
previous = v
}
return values, count
}
func (s *sparseCompacter) Size(v []uint64) (int, bool) {
_, n := makeSparse(v)
// We limit using this method to having 16 entries.
if n > 16 {
return 0, false
}
return 2 + int(reflect.TypeOf(valueRange{}).Size())*n, true
}
func (s *sparseCompacter) Store(v []uint64) uint32 {
h := uint32(len(s.sparseOffsets))
values, sz := makeSparse(v)
s.sparseBlocks = append(s.sparseBlocks, values)
s.sparseOffsets = append(s.sparseOffsets, uint16(s.sparseCount))
s.sparseCount += sz
return h
}
func (s *sparseCompacter) Handler() string {
// The sparse global variable and its lookup method is defined in gen_trieval.go.
return "sparse.lookup"
}
func (s *sparseCompacter) Print(w io.Writer) (retErr error) {
p := func(format string, args ...interface{}) {
_, err := fmt.Fprintf(w, format, args...)
if retErr == nil && err != nil {
retErr = err
}
}
ls := len(s.sparseBlocks)
if ls == len(s.sparseOffsets) {
s.sparseOffsets = append(s.sparseOffsets, uint16(s.sparseCount))
}
p("// sparseOffsets: %d entries, %d bytes\n", ls+1, (ls+1)*2)
p("var sparseOffsets = %#v\n\n", s.sparseOffsets)
ns := s.sparseCount
p("// sparseValues: %d entries, %d bytes\n", ns, ns*4)
p("var sparseValues = [%d]valueRange {", ns)
for i, values := range s.sparseBlocks {
p("\n// Block %#x, offset %#x", i, s.sparseOffsets[i])
var v uint16
for i, nv := range values {
if nv != v {
if v != 0 {
p(",hi:%#02x},", 0x80+i-1)
}
if nv != 0 {
p("\n{value:%#04x,lo:%#02x", nv, 0x80+i)
}
}
v = nv
}
if v != 0 {
p(",hi:%#02x},", 0x80+len(values)-1)
}
}
p("\n}\n\n")
return
}
// verifyProperties that properties of the runes that are relied upon in the
// implementation. Each property is marked with an identifier that is referred
// to in the places where it is used.
func verifyProperties(chars []runeInfo) {
for i, c := range chars {
r := rune(i)
// Rune properties.
// A.1: modifier never changes on lowercase. [ltLower]
if c.CCC > 0 && unicode.ToLower(r) != r {
log.Fatalf("%U: non-starter changes when lowercased", r)
}
// A.2: properties of decompositions starting with I or J. [ltLower]
d := norm.NFD.PropertiesString(string(r)).Decomposition()
if len(d) > 0 {
if d[0] == 'I' || d[0] == 'J' {
// A.2.1: we expect at least an ASCII character and a modifier.
if len(d) < 3 {
log.Fatalf("%U: length of decomposition was %d; want >= 3", r, len(d))
}
// All subsequent runes are modifiers and all have the same CCC.
runes := []rune(string(d[1:]))
ccc := chars[runes[0]].CCC
for _, mr := range runes[1:] {
mc := chars[mr]
// A.2.2: all modifiers have a CCC of Above or less.
if ccc == 0 || ccc > above {
log.Fatalf("%U: CCC of successive rune (%U) was %d; want (0,230]", r, mr, ccc)
}
// A.2.3: a sequence of modifiers all have the same CCC.
if mc.CCC != ccc {
log.Fatalf("%U: CCC of follow-up modifier (%U) was %d; want %d", r, mr, mc.CCC, ccc)
}
// A.2.4: for each trailing r, r in [0x300, 0x311] <=> CCC == Above.
if (ccc == above) != (0x300 <= mr && mr <= 0x311) {
log.Fatalf("%U: modifier %U in [U+0300, U+0311] != ccc(%U) == 230", r, mr, mr)
}
if i += len(string(mr)); i >= len(d) {
break
}
}
}
}
// A.3: no U+0307 in decomposition of Soft-Dotted rune. [ltUpper]
if unicode.Is(unicode.Soft_Dotted, r) && strings.Contains(string(d), "\u0307") {
log.Fatalf("%U: decomposition of soft-dotted rune may not contain U+0307", r)
}
// A.4: only rune U+0345 may be of CCC Iota_Subscript. [elUpper]
if c.CCC == iotaSubscript && r != 0x0345 {
log.Fatalf("%U: only rune U+0345 may have CCC Iota_Subscript", r)
}
// A.5: soft-dotted runes do not have exceptions.
if c.SoftDotted && c.entry&exceptionBit != 0 {
log.Fatalf("%U: soft-dotted has exception", r)
}
// A.6: Greek decomposition. [elUpper]
if unicode.Is(unicode.Greek, r) {
if b := norm.NFD.PropertiesString(string(r)).Decomposition(); b != nil {
runes := []rune(string(b))
// A.6.1: If a Greek rune decomposes and the first rune of the
// decomposition is greater than U+00FF, the rune is always
// great and not a modifier.
if f := runes[0]; unicode.IsMark(f) || f > 0xFF && !unicode.Is(unicode.Greek, f) {
log.Fatalf("%U: expeced first rune of Greek decomposition to be letter, found %U", r, f)
}
// A.6.2: Any follow-up rune in a Greek decomposition is a
// modifier of which the first should be gobbled in
// decomposition.
for _, m := range runes[1:] {
switch m {
case 0x0313, 0x0314, 0x0301, 0x0300, 0x0306, 0x0342, 0x0308, 0x0304, 0x345:
default:
log.Fatalf("%U: modifier %U is outside of expeced Greek modifier set", r, m)
}
}
}
}
// Breaking properties.
// B.1: all runes with CCC > 0 are of break type Extend.
if c.CCC > 0 && c.BreakType != "Extend" {
log.Fatalf("%U: CCC == %d, but got break type %s; want Extend", r, c.CCC, c.BreakType)
}
// B.2: all cased runes with c.CCC == 0 are of break type ALetter.
if c.CCC == 0 && c.Cased && c.BreakType != "ALetter" {
log.Fatalf("%U: cased, but got break type %s; want ALetter", r, c.BreakType)
}
// B.3: letter category.
if c.CCC == 0 && c.BreakCat != breakBreak && !c.CaseIgnorable {
if c.BreakCat != breakLetter {
log.Fatalf("%U: check for letter break type gave %d; want %d", r, c.BreakCat, breakLetter)
}
}
}
}
func genTablesTest() {
w := &bytes.Buffer{}
fmt.Fprintln(w, "var (")
printProperties(w, "DerivedCoreProperties.txt", "Case_Ignorable", verifyIgnore)
// We discard the output as we know we have perfect functions. We run them
// just to verify the properties are correct.
n := printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Cased", verifyCased)
n += printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Lowercase", verifyLower)
n += printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Uppercase", verifyUpper)
if n > 0 {
log.Fatalf("One of the discarded properties does not have a perfect filter.")
}
// <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)?
fmt.Fprintln(w, "\tspecial = map[rune]struct{ toLower, toTitle, toUpper string }{")
parse("SpecialCasing.txt", func(p *ucd.Parser) {
// Skip conditional entries.
if p.String(4) != "" {
return
}
r := p.Rune(0)
fmt.Fprintf(w, "\t\t0x%04x: {%q, %q, %q},\n",
r, string(p.Runes(1)), string(p.Runes(2)), string(p.Runes(3)))
})
fmt.Fprint(w, "\t}\n\n")
// <code>; <type>; <runes>
table := map[rune]struct{ simple, full, special string }{}
parse("CaseFolding.txt", func(p *ucd.Parser) {
r := p.Rune(0)
t := p.String(1)
v := string(p.Runes(2))
if t != "T" && v == string(unicode.ToLower(r)) {
return
}
x := table[r]
switch t {
case "C":
x.full = v
x.simple = v
case "S":
x.simple = v
case "F":
x.full = v
case "T":
x.special = v
}
table[r] = x
})
fmt.Fprintln(w, "\tfoldMap = map[rune]struct{ simple, full, special string }{")
for r := rune(0); r < 0x10FFFF; r++ {
x, ok := table[r]
if !ok {
continue
}
fmt.Fprintf(w, "\t\t0x%04x: {%q, %q, %q},\n", r, x.simple, x.full, x.special)
}
fmt.Fprint(w, "\t}\n\n")
// Break property
notBreak := map[rune]bool{}
parse("auxiliary/WordBreakProperty.txt", func(p *ucd.Parser) {
switch p.String(1) {
case "Extend", "Format", "MidLetter", "MidNumLet", "Single_Quote",
"ALetter", "Hebrew_Letter", "Numeric", "ExtendNumLet", "ZWJ":
notBreak[p.Rune(0)] = true
}
})
fmt.Fprintln(w, "\tbreakProp = []struct{ lo, hi rune }{")
inBreak := false
for r := rune(0); r <= lastRuneForTesting; r++ {
if isBreak := !notBreak[r]; isBreak != inBreak {
if isBreak {
fmt.Fprintf(w, "\t\t{0x%x, ", r)
} else {
fmt.Fprintf(w, "0x%x},\n", r-1)
}
inBreak = isBreak
}
}
if inBreak {
fmt.Fprintf(w, "0x%x},\n", lastRuneForTesting)
}
fmt.Fprint(w, "\t}\n\n")
// Word break test
// Filter out all samples that do not contain cased characters.
cased := map[rune]bool{}
parse("DerivedCoreProperties.txt", func(p *ucd.Parser) {
if p.String(1) == "Cased" {
cased[p.Rune(0)] = true
}
})
fmt.Fprintln(w, "\tbreakTest = []string{")
parse("auxiliary/WordBreakTest.txt", func(p *ucd.Parser) {
c := strings.Split(p.String(0), " ")
const sep = '|'
numCased := 0
test := ""
for ; len(c) >= 2; c = c[2:] {
if c[0] == "÷" && test != "" {
test += string(sep)
}
i, err := strconv.ParseUint(c[1], 16, 32)
r := rune(i)
if err != nil {
log.Fatalf("Invalid rune %q.", c[1])
}
if r == sep {
log.Fatalf("Separator %q not allowed in test data. Pick another one.", sep)
}
if cased[r] {
numCased++
}
test += string(r)
}
if numCased > 1 {
fmt.Fprintf(w, "\t\t%q,\n", test)
}
})
fmt.Fprintln(w, "\t}")
fmt.Fprintln(w, ")")
gen.WriteGoFile("tables_test.go", "cases", w.Bytes())
}
// These functions are just used for verification that their definition have not
// changed in the Unicode Standard.
func verifyCased(r rune) bool {
return verifyLower(r) || verifyUpper(r) || unicode.IsTitle(r)
}
func verifyLower(r rune) bool {
return unicode.IsLower(r) || unicode.Is(unicode.Other_Lowercase, r)
}
func verifyUpper(r rune) bool {
return unicode.IsUpper(r) || unicode.Is(unicode.Other_Uppercase, r)
}
// verifyIgnore is an approximation of the Case_Ignorable property using the
// core unicode package. It is used to reduce the size of the test data.
func verifyIgnore(r rune) bool {
props := []*unicode.RangeTable{
unicode.Mn,
unicode.Me,
unicode.Cf,
unicode.Lm,
unicode.Sk,
}
for _, p := range props {
if unicode.Is(p, r) {
return true
}
}
return false
}
// printProperties prints tables of rune properties from the given UCD file.
// A filter func f can be given to exclude certain values. A rune r will have
// the indicated property if it is in the generated table or if f(r).
func printProperties(w io.Writer, file, property string, f func(r rune) bool) int {
verify := map[rune]bool{}
n := 0
varNameParts := strings.Split(property, "_")
varNameParts[0] = strings.ToLower(varNameParts[0])
fmt.Fprintf(w, "\t%s = map[rune]bool{\n", strings.Join(varNameParts, ""))
parse(file, func(p *ucd.Parser) {
if p.String(1) == property {
r := p.Rune(0)
verify[r] = true
if !f(r) {
n++
fmt.Fprintf(w, "\t\t0x%.4x: true,\n", r)
}
}
})
fmt.Fprint(w, "\t}\n\n")
// Verify that f is correct, that is, it represents a subset of the property.
for r := rune(0); r <= lastRuneForTesting; r++ {
if !verify[r] && f(r) {
log.Fatalf("Incorrect filter func for property %q.", property)
}
}
return n
}
// The newCaseTrie, sparseValues and sparseOffsets definitions below are
// placeholders referred to by gen_trieval.go. The real definitions are
// generated by this program and written to tables.go.
func newCaseTrie(int) int { return 0 }
var (
sparseValues [0]valueRange
sparseOffsets [0]uint16
)

219
vendor/golang.org/x/text/cases/gen_trieval.go generated vendored Normal file
View file

@ -0,0 +1,219 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This file contains definitions for interpreting the trie value of the case
// trie generated by "go run gen*.go". It is shared by both the generator
// program and the resultant package. Sharing is achieved by the generator
// copying gen_trieval.go to trieval.go and changing what's above this comment.
// info holds case information for a single rune. It is the value returned
// by a trie lookup. Most mapping information can be stored in a single 16-bit
// value. If not, for example when a rune is mapped to multiple runes, the value
// stores some basic case data and an index into an array with additional data.
//
// The per-rune values have the following format:
//
// if (exception) {
// 15..5 unsigned exception index
// 4 unused
// } else {
// 15..8 XOR pattern or index to XOR pattern for case mapping
// Only 13..8 are used for XOR patterns.
// 7 inverseFold (fold to upper, not to lower)
// 6 index: interpret the XOR pattern as an index
// or isMid if case mode is cIgnorableUncased.
// 5..4 CCC: zero (normal or break), above or other
// }
// 3 exception: interpret this value as an exception index
// (TODO: is this bit necessary? Probably implied from case mode.)
// 2..0 case mode
//
// For the non-exceptional cases, a rune must be either uncased, lowercase or
// uppercase. If the rune is cased, the XOR pattern maps either a lowercase
// rune to uppercase or an uppercase rune to lowercase (applied to the 10
// least-significant bits of the rune).
//
// See the definitions below for a more detailed description of the various
// bits.
type info uint16
const (
casedMask = 0x0003
fullCasedMask = 0x0007
ignorableMask = 0x0006
ignorableValue = 0x0004
inverseFoldBit = 1 << 7
isMidBit = 1 << 6
exceptionBit = 1 << 3
exceptionShift = 5
numExceptionBits = 11
xorIndexBit = 1 << 6
xorShift = 8
// There is no mapping if all xor bits and the exception bit are zero.
hasMappingMask = 0xff80 | exceptionBit
)
// The case mode bits encodes the case type of a rune. This includes uncased,
// title, upper and lower case and case ignorable. (For a definition of these
// terms see Chapter 3 of The Unicode Standard Core Specification.) In some rare
// cases, a rune can be both cased and case-ignorable. This is encoded by
// cIgnorableCased. A rune of this type is always lower case. Some runes are
// cased while not having a mapping.
//
// A common pattern for scripts in the Unicode standard is for upper and lower
// case runes to alternate for increasing rune values (e.g. the accented Latin
// ranges starting from U+0100 and U+1E00 among others and some Cyrillic
// characters). We use this property by defining a cXORCase mode, where the case
// mode (always upper or lower case) is derived from the rune value. As the XOR
// pattern for case mappings is often identical for successive runes, using
// cXORCase can result in large series of identical trie values. This, in turn,
// allows us to better compress the trie blocks.
const (
cUncased info = iota // 000
cTitle // 001
cLower // 010
cUpper // 011
cIgnorableUncased // 100
cIgnorableCased // 101 // lower case if mappings exist
cXORCase // 11x // case is cLower | ((rune&1) ^ x)
maxCaseMode = cUpper
)
func (c info) isCased() bool {
return c&casedMask != 0
}
func (c info) isCaseIgnorable() bool {
return c&ignorableMask == ignorableValue
}
func (c info) isNotCasedAndNotCaseIgnorable() bool {
return c&fullCasedMask == 0
}
func (c info) isCaseIgnorableAndNotCased() bool {
return c&fullCasedMask == cIgnorableUncased
}
func (c info) isMid() bool {
return c&(fullCasedMask|isMidBit) == isMidBit|cIgnorableUncased
}
// The case mapping implementation will need to know about various Canonical
// Combining Class (CCC) values. We encode two of these in the trie value:
// cccZero (0) and cccAbove (230). If the value is cccOther, it means that
// CCC(r) > 0, but not 230. A value of cccBreak means that CCC(r) == 0 and that
// the rune also has the break category Break (see below).
const (
cccBreak info = iota << 4
cccZero
cccAbove
cccOther
cccMask = cccBreak | cccZero | cccAbove | cccOther
)
const (
starter = 0
above = 230
iotaSubscript = 240
)
// The exceptions slice holds data that does not fit in a normal info entry.
// The entry is pointed to by the exception index in an entry. It has the
// following format:
//
// Header
// byte 0:
// 7..6 unused
// 5..4 CCC type (same bits as entry)
// 3 unused
// 2..0 length of fold
//
// byte 1:
// 7..6 unused
// 5..3 length of 1st mapping of case type
// 2..0 length of 2nd mapping of case type
//
// case 1st 2nd
// lower -> upper, title
// upper -> lower, title
// title -> lower, upper
//
// Lengths with the value 0x7 indicate no value and implies no change.
// A length of 0 indicates a mapping to zero-length string.
//
// Body bytes:
// case folding bytes
// lowercase mapping bytes
// uppercase mapping bytes
// titlecase mapping bytes
// closure mapping bytes (for NFKC_Casefold). (TODO)
//
// Fallbacks:
// missing fold -> lower
// missing title -> upper
// all missing -> original rune
//
// exceptions starts with a dummy byte to enforce that there is no zero index
// value.
const (
lengthMask = 0x07
lengthBits = 3
noChange = 0
)
// References to generated trie.
var trie = newCaseTrie(0)
var sparse = sparseBlocks{
values: sparseValues[:],
offsets: sparseOffsets[:],
}
// Sparse block lookup code.
// valueRange is an entry in a sparse block.
type valueRange struct {
value uint16
lo, hi byte
}
type sparseBlocks struct {
values []valueRange
offsets []uint16
}
// lookup returns the value from values block n for byte b using binary search.
func (s *sparseBlocks) lookup(n uint32, b byte) uint16 {
lo := s.offsets[n]
hi := s.offsets[n+1]
for lo < hi {
m := lo + (hi-lo)/2
r := s.values[m]
if r.lo <= b && b <= r.hi {
return r.value
}
if b < r.lo {
hi = m
} else {
lo = m + 1
}
}
return 0
}
// lastRuneForTesting is the last rune used for testing. Everything after this
// is boring.
const lastRuneForTesting = rune(0x1FFFF)

61
vendor/golang.org/x/text/cases/icu.go generated vendored Normal file
View file

@ -0,0 +1,61 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build icu
package cases
// Ideally these functions would be defined in a test file, but go test doesn't
// allow CGO in tests. The build tag should ensure either way that these
// functions will not end up in the package.
// TODO: Ensure that the correct ICU version is set.
/*
#cgo LDFLAGS: -licui18n.57 -licuuc.57
#include <stdlib.h>
#include <unicode/ustring.h>
#include <unicode/utypes.h>
#include <unicode/localpointer.h>
#include <unicode/ucasemap.h>
*/
import "C"
import "unsafe"
func doICU(tag, caser, input string) string {
err := C.UErrorCode(0)
loc := C.CString(tag)
cm := C.ucasemap_open(loc, C.uint32_t(0), &err)
buf := make([]byte, len(input)*4)
dst := (*C.char)(unsafe.Pointer(&buf[0]))
src := C.CString(input)
cn := C.int32_t(0)
switch caser {
case "fold":
cn = C.ucasemap_utf8FoldCase(cm,
dst, C.int32_t(len(buf)),
src, C.int32_t(len(input)),
&err)
case "lower":
cn = C.ucasemap_utf8ToLower(cm,
dst, C.int32_t(len(buf)),
src, C.int32_t(len(input)),
&err)
case "upper":
cn = C.ucasemap_utf8ToUpper(cm,
dst, C.int32_t(len(buf)),
src, C.int32_t(len(input)),
&err)
case "title":
cn = C.ucasemap_utf8ToTitle(cm,
dst, C.int32_t(len(buf)),
src, C.int32_t(len(input)),
&err)
}
return string(buf[:cn])
}

82
vendor/golang.org/x/text/cases/info.go generated vendored Normal file
View file

@ -0,0 +1,82 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cases
func (c info) cccVal() info {
if c&exceptionBit != 0 {
return info(exceptions[c>>exceptionShift]) & cccMask
}
return c & cccMask
}
func (c info) cccType() info {
ccc := c.cccVal()
if ccc <= cccZero {
return cccZero
}
return ccc
}
// TODO: Implement full Unicode breaking algorithm:
// 1) Implement breaking in separate package.
// 2) Use the breaker here.
// 3) Compare table size and performance of using the more generic breaker.
//
// Note that we can extend the current algorithm to be much more accurate. This
// only makes sense, though, if the performance and/or space penalty of using
// the generic breaker is big. Extra data will only be needed for non-cased
// runes, which means there are sufficient bits left in the caseType.
// ICU prohibits breaking in such cases as well.
// For the purpose of title casing we use an approximation of the Unicode Word
// Breaking algorithm defined in Annex #29:
// http://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table.
//
// For our approximation, we group the Word Break types into the following
// categories, with associated rules:
//
// 1) Letter:
// ALetter, Hebrew_Letter, Numeric, ExtendNumLet, Extend, Format_FE, ZWJ.
// Rule: Never break between consecutive runes of this category.
//
// 2) Mid:
// MidLetter, MidNumLet, Single_Quote.
// (Cf. case-ignorable: MidLetter, MidNumLet, Single_Quote or cat is Mn,
// Me, Cf, Lm or Sk).
// Rule: Don't break between Letter and Mid, but break between two Mids.
//
// 3) Break:
// Any other category: NewLine, MidNum, CR, LF, Double_Quote, Katakana, and
// Other.
// These categories should always result in a break between two cased letters.
// Rule: Always break.
//
// Note 1: the Katakana and MidNum categories can, in esoteric cases, result in
// preventing a break between two cased letters. For now we will ignore this
// (e.g. [ALetter] [ExtendNumLet] [Katakana] [ExtendNumLet] [ALetter] and
// [ALetter] [Numeric] [MidNum] [Numeric] [ALetter].)
//
// Note 2: the rule for Mid is very approximate, but works in most cases. To
// improve, we could store the categories in the trie value and use a FA to
// manage breaks. See TODO comment above.
//
// Note 3: according to the spec, it is possible for the Extend category to
// introduce breaks between other categories grouped in Letter. However, this
// is undesirable for our purposes. ICU prevents breaks in such cases as well.
// isBreak returns whether this rune should introduce a break.
func (c info) isBreak() bool {
return c.cccVal() == cccBreak
}
// isLetter returns whether the rune is of break type ALetter, Hebrew_Letter,
// Numeric, ExtendNumLet, or Extend.
func (c info) isLetter() bool {
ccc := c.cccVal()
if ccc == cccZero {
return !c.isCaseIgnorable()
}
return ccc != cccBreak
}

816
vendor/golang.org/x/text/cases/map.go generated vendored Normal file
View file

@ -0,0 +1,816 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cases
// This file contains the definitions of case mappings for all supported
// languages. The rules for the language-specific tailorings were taken and
// modified from the CLDR transform definitions in common/transforms.
import (
"strings"
"unicode"
"unicode/utf8"
"golang.org/x/text/internal"
"golang.org/x/text/language"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
)
// A mapFunc takes a context set to the current rune and writes the mapped
// version to the same context. It may advance the context to the next rune. It
// returns whether a checkpoint is possible: whether the pDst bytes written to
// dst so far won't need changing as we see more source bytes.
type mapFunc func(*context) bool
// A spanFunc takes a context set to the current rune and returns whether this
// rune would be altered when written to the output. It may advance the context
// to the next rune. It returns whether a checkpoint is possible.
type spanFunc func(*context) bool
// maxIgnorable defines the maximum number of ignorables to consider for
// lookahead operations.
const maxIgnorable = 30
// supported lists the language tags for which we have tailorings.
const supported = "und af az el lt nl tr"
func init() {
tags := []language.Tag{}
for _, s := range strings.Split(supported, " ") {
tags = append(tags, language.MustParse(s))
}
matcher = internal.NewInheritanceMatcher(tags)
Supported = language.NewCoverage(tags)
}
var (
matcher *internal.InheritanceMatcher
Supported language.Coverage
// We keep the following lists separate, instead of having a single per-
// language struct, to give the compiler a chance to remove unused code.
// Some uppercase mappers are stateless, so we can precompute the
// Transformers and save a bit on runtime allocations.
upperFunc = []struct {
upper mapFunc
span spanFunc
}{
{nil, nil}, // und
{nil, nil}, // af
{aztrUpper(upper), isUpper}, // az
{elUpper, noSpan}, // el
{ltUpper(upper), noSpan}, // lt
{nil, nil}, // nl
{aztrUpper(upper), isUpper}, // tr
}
undUpper transform.SpanningTransformer = &undUpperCaser{}
undLower transform.SpanningTransformer = &undLowerCaser{}
undLowerIgnoreSigma transform.SpanningTransformer = &undLowerIgnoreSigmaCaser{}
lowerFunc = []mapFunc{
nil, // und
nil, // af
aztrLower, // az
nil, // el
ltLower, // lt
nil, // nl
aztrLower, // tr
}
titleInfos = []struct {
title mapFunc
lower mapFunc
titleSpan spanFunc
rewrite func(*context)
}{
{title, lower, isTitle, nil}, // und
{title, lower, isTitle, afnlRewrite}, // af
{aztrUpper(title), aztrLower, isTitle, nil}, // az
{title, lower, isTitle, nil}, // el
{ltUpper(title), ltLower, noSpan, nil}, // lt
{nlTitle, lower, nlTitleSpan, afnlRewrite}, // nl
{aztrUpper(title), aztrLower, isTitle, nil}, // tr
}
)
func makeUpper(t language.Tag, o options) transform.SpanningTransformer {
_, i, _ := matcher.Match(t)
f := upperFunc[i].upper
if f == nil {
return undUpper
}
return &simpleCaser{f: f, span: upperFunc[i].span}
}
func makeLower(t language.Tag, o options) transform.SpanningTransformer {
_, i, _ := matcher.Match(t)
f := lowerFunc[i]
if f == nil {
if o.ignoreFinalSigma {
return undLowerIgnoreSigma
}
return undLower
}
if o.ignoreFinalSigma {
return &simpleCaser{f: f, span: isLower}
}
return &lowerCaser{
first: f,
midWord: finalSigma(f),
}
}
func makeTitle(t language.Tag, o options) transform.SpanningTransformer {
_, i, _ := matcher.Match(t)
x := &titleInfos[i]
lower := x.lower
if o.noLower {
lower = (*context).copy
} else if !o.ignoreFinalSigma {
lower = finalSigma(lower)
}
return &titleCaser{
title: x.title,
lower: lower,
titleSpan: x.titleSpan,
rewrite: x.rewrite,
}
}
func noSpan(c *context) bool {
c.err = transform.ErrEndOfSpan
return false
}
// TODO: consider a similar special case for the fast majority lower case. This
// is a bit more involved so will require some more precise benchmarking to
// justify it.
type undUpperCaser struct{ transform.NopResetter }
// undUpperCaser implements the Transformer interface for doing an upper case
// mapping for the root locale (und). It eliminates the need for an allocation
// as it prevents escaping by not using function pointers.
func (t undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
c := context{dst: dst, src: src, atEOF: atEOF}
for c.next() {
upper(&c)
c.checkpoint()
}
return c.ret()
}
func (t undUpperCaser) Span(src []byte, atEOF bool) (n int, err error) {
c := context{src: src, atEOF: atEOF}
for c.next() && isUpper(&c) {
c.checkpoint()
}
return c.retSpan()
}
// undLowerIgnoreSigmaCaser implements the Transformer interface for doing
// a lower case mapping for the root locale (und) ignoring final sigma
// handling. This casing algorithm is used in some performance-critical packages
// like secure/precis and x/net/http/idna, which warrants its special-casing.
type undLowerIgnoreSigmaCaser struct{ transform.NopResetter }
func (t undLowerIgnoreSigmaCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
c := context{dst: dst, src: src, atEOF: atEOF}
for c.next() && lower(&c) {
c.checkpoint()
}
return c.ret()
}
// Span implements a generic lower-casing. This is possible as isLower works
// for all lowercasing variants. All lowercase variants only vary in how they
// transform a non-lowercase letter. They will never change an already lowercase
// letter. In addition, there is no state.
func (t undLowerIgnoreSigmaCaser) Span(src []byte, atEOF bool) (n int, err error) {
c := context{src: src, atEOF: atEOF}
for c.next() && isLower(&c) {
c.checkpoint()
}
return c.retSpan()
}
type simpleCaser struct {
context
f mapFunc
span spanFunc
}
// simpleCaser implements the Transformer interface for doing a case operation
// on a rune-by-rune basis.
func (t *simpleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
c := context{dst: dst, src: src, atEOF: atEOF}
for c.next() && t.f(&c) {
c.checkpoint()
}
return c.ret()
}
func (t *simpleCaser) Span(src []byte, atEOF bool) (n int, err error) {
c := context{src: src, atEOF: atEOF}
for c.next() && t.span(&c) {
c.checkpoint()
}
return c.retSpan()
}
// undLowerCaser implements the Transformer interface for doing a lower case
// mapping for the root locale (und) ignoring final sigma handling. This casing
// algorithm is used in some performance-critical packages like secure/precis
// and x/net/http/idna, which warrants its special-casing.
type undLowerCaser struct{ transform.NopResetter }
func (t undLowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
c := context{dst: dst, src: src, atEOF: atEOF}
for isInterWord := true; c.next(); {
if isInterWord {
if c.info.isCased() {
if !lower(&c) {
break
}
isInterWord = false
} else if !c.copy() {
break
}
} else {
if c.info.isNotCasedAndNotCaseIgnorable() {
if !c.copy() {
break
}
isInterWord = true
} else if !c.hasPrefix("Σ") {
if !lower(&c) {
break
}
} else if !finalSigmaBody(&c) {
break
}
}
c.checkpoint()
}
return c.ret()
}
func (t undLowerCaser) Span(src []byte, atEOF bool) (n int, err error) {
c := context{src: src, atEOF: atEOF}
for c.next() && isLower(&c) {
c.checkpoint()
}
return c.retSpan()
}
// lowerCaser implements the Transformer interface. The default Unicode lower
// casing requires different treatment for the first and subsequent characters
// of a word, most notably to handle the Greek final Sigma.
type lowerCaser struct {
undLowerIgnoreSigmaCaser
context
first, midWord mapFunc
}
func (t *lowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
t.context = context{dst: dst, src: src, atEOF: atEOF}
c := &t.context
for isInterWord := true; c.next(); {
if isInterWord {
if c.info.isCased() {
if !t.first(c) {
break
}
isInterWord = false
} else if !c.copy() {
break
}
} else {
if c.info.isNotCasedAndNotCaseIgnorable() {
if !c.copy() {
break
}
isInterWord = true
} else if !t.midWord(c) {
break
}
}
c.checkpoint()
}
return c.ret()
}
// titleCaser implements the Transformer interface. Title casing algorithms
// distinguish between the first letter of a word and subsequent letters of the
// same word. It uses state to avoid requiring a potentially infinite lookahead.
type titleCaser struct {
context
// rune mappings used by the actual casing algorithms.
title mapFunc
lower mapFunc
titleSpan spanFunc
rewrite func(*context)
}
// Transform implements the standard Unicode title case algorithm as defined in
// Chapter 3 of The Unicode Standard:
// toTitlecase(X): Find the word boundaries in X according to Unicode Standard
// Annex #29, "Unicode Text Segmentation." For each word boundary, find the
// first cased character F following the word boundary. If F exists, map F to
// Titlecase_Mapping(F); then map all characters C between F and the following
// word boundary to Lowercase_Mapping(C).
func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
t.context = context{dst: dst, src: src, atEOF: atEOF, isMidWord: t.isMidWord}
c := &t.context
if !c.next() {
return c.ret()
}
for {
p := c.info
if t.rewrite != nil {
t.rewrite(c)
}
wasMid := p.isMid()
// Break out of this loop on failure to ensure we do not modify the
// state incorrectly.
if p.isCased() {
if !c.isMidWord {
if !t.title(c) {
break
}
c.isMidWord = true
} else if !t.lower(c) {
break
}
} else if !c.copy() {
break
} else if p.isBreak() {
c.isMidWord = false
}
// As we save the state of the transformer, it is safe to call
// checkpoint after any successful write.
if !(c.isMidWord && wasMid) {
c.checkpoint()
}
if !c.next() {
break
}
if wasMid && c.info.isMid() {
c.isMidWord = false
}
}
return c.ret()
}
func (t *titleCaser) Span(src []byte, atEOF bool) (n int, err error) {
t.context = context{src: src, atEOF: atEOF, isMidWord: t.isMidWord}
c := &t.context
if !c.next() {
return c.retSpan()
}
for {
p := c.info
if t.rewrite != nil {
t.rewrite(c)
}
wasMid := p.isMid()
// Break out of this loop on failure to ensure we do not modify the
// state incorrectly.
if p.isCased() {
if !c.isMidWord {
if !t.titleSpan(c) {
break
}
c.isMidWord = true
} else if !isLower(c) {
break
}
} else if p.isBreak() {
c.isMidWord = false
}
// As we save the state of the transformer, it is safe to call
// checkpoint after any successful write.
if !(c.isMidWord && wasMid) {
c.checkpoint()
}
if !c.next() {
break
}
if wasMid && c.info.isMid() {
c.isMidWord = false
}
}
return c.retSpan()
}
// finalSigma adds Greek final Sigma handing to another casing function. It
// determines whether a lowercased sigma should be σ or ς, by looking ahead for
// case-ignorables and a cased letters.
func finalSigma(f mapFunc) mapFunc {
return func(c *context) bool {
if !c.hasPrefix("Σ") {
return f(c)
}
return finalSigmaBody(c)
}
}
func finalSigmaBody(c *context) bool {
// Current rune must be ∑.
// ::NFD();
// # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
// Σ } [:case-ignorable:]* [:cased:] → σ;
// [:cased:] [:case-ignorable:]* { Σ → ς;
// ::Any-Lower;
// ::NFC();
p := c.pDst
c.writeString("ς")
// TODO: we should do this here, but right now this will never have an
// effect as this is called when the prefix is Sigma, whereas Dutch and
// Afrikaans only test for an apostrophe.
//
// if t.rewrite != nil {
// t.rewrite(c)
// }
// We need to do one more iteration after maxIgnorable, as a cased
// letter is not an ignorable and may modify the result.
wasMid := false
for i := 0; i < maxIgnorable+1; i++ {
if !c.next() {
return false
}
if !c.info.isCaseIgnorable() {
// All Midword runes are also case ignorable, so we are
// guaranteed to have a letter or word break here. As we are
// unreading the run, there is no need to unset c.isMidWord;
// the title caser will handle this.
if c.info.isCased() {
// p+1 is guaranteed to be in bounds: if writing ς was
// successful, p+1 will contain the second byte of ς. If not,
// this function will have returned after c.next returned false.
c.dst[p+1]++ // ς → σ
}
c.unreadRune()
return true
}
// A case ignorable may also introduce a word break, so we may need
// to continue searching even after detecting a break.
isMid := c.info.isMid()
if (wasMid && isMid) || c.info.isBreak() {
c.isMidWord = false
}
wasMid = isMid
c.copy()
}
return true
}
// finalSigmaSpan would be the same as isLower.
// elUpper implements Greek upper casing, which entails removing a predefined
// set of non-blocked modifiers. Note that these accents should not be removed
// for title casing!
// Example: "Οδός" -> "ΟΔΟΣ".
func elUpper(c *context) bool {
// From CLDR:
// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Above:]]*? { [\u0313\u0314\u0301\u0300\u0306\u0342\u0308\u0304] → ;
// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Iota_Subscript:]]*? { \u0345 → ;
r, _ := utf8.DecodeRune(c.src[c.pSrc:])
oldPDst := c.pDst
if !upper(c) {
return false
}
if !unicode.Is(unicode.Greek, r) {
return true
}
i := 0
// Take the properties of the uppercased rune that is already written to the
// destination. This saves us the trouble of having to uppercase the
// decomposed rune again.
if b := norm.NFD.Properties(c.dst[oldPDst:]).Decomposition(); b != nil {
// Restore the destination position and process the decomposed rune.
r, sz := utf8.DecodeRune(b)
if r <= 0xFF { // See A.6.1
return true
}
c.pDst = oldPDst
// Insert the first rune and ignore the modifiers. See A.6.2.
c.writeBytes(b[:sz])
i = len(b[sz:]) / 2 // Greek modifiers are always of length 2.
}
for ; i < maxIgnorable && c.next(); i++ {
switch r, _ := utf8.DecodeRune(c.src[c.pSrc:]); r {
// Above and Iota Subscript
case 0x0300, // U+0300 COMBINING GRAVE ACCENT
0x0301, // U+0301 COMBINING ACUTE ACCENT
0x0304, // U+0304 COMBINING MACRON
0x0306, // U+0306 COMBINING BREVE
0x0308, // U+0308 COMBINING DIAERESIS
0x0313, // U+0313 COMBINING COMMA ABOVE
0x0314, // U+0314 COMBINING REVERSED COMMA ABOVE
0x0342, // U+0342 COMBINING GREEK PERISPOMENI
0x0345: // U+0345 COMBINING GREEK YPOGEGRAMMENI
// No-op. Gobble the modifier.
default:
switch v, _ := trie.lookup(c.src[c.pSrc:]); info(v).cccType() {
case cccZero:
c.unreadRune()
return true
// We don't need to test for IotaSubscript as the only rune that
// qualifies (U+0345) was already excluded in the switch statement
// above. See A.4.
case cccAbove:
return c.copy()
default:
// Some other modifier. We're still allowed to gobble Greek
// modifiers after this.
c.copy()
}
}
}
return i == maxIgnorable
}
// TODO: implement elUpperSpan (low-priority: complex and infrequent).
func ltLower(c *context) bool {
// From CLDR:
// # Introduce an explicit dot above when lowercasing capital I's and J's
// # whenever there are more accents above.
// # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
// # 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
// # 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
// # 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
// # 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
// # 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
// # 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
// ::NFD();
// I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
// J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
// I \u0328 (Į) } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307;
// I \u0300 (Ì) → i \u0307 \u0300;
// I \u0301 (Í) → i \u0307 \u0301;
// I \u0303 (Ĩ) → i \u0307 \u0303;
// ::Any-Lower();
// ::NFC();
i := 0
if r := c.src[c.pSrc]; r < utf8.RuneSelf {
lower(c)
if r != 'I' && r != 'J' {
return true
}
} else {
p := norm.NFD.Properties(c.src[c.pSrc:])
if d := p.Decomposition(); len(d) >= 3 && (d[0] == 'I' || d[0] == 'J') {
// UTF-8 optimization: the decomposition will only have an above
// modifier if the last rune of the decomposition is in [U+300-U+311].
// In all other cases, a decomposition starting with I is always
// an I followed by modifiers that are not cased themselves. See A.2.
if d[1] == 0xCC && d[2] <= 0x91 { // A.2.4.
if !c.writeBytes(d[:1]) {
return false
}
c.dst[c.pDst-1] += 'a' - 'A' // lower
// Assumption: modifier never changes on lowercase. See A.1.
// Assumption: all modifiers added have CCC = Above. See A.2.3.
return c.writeString("\u0307") && c.writeBytes(d[1:])
}
// In all other cases the additional modifiers will have a CCC
// that is less than 230 (Above). We will insert the U+0307, if
// needed, after these modifiers so that a string in FCD form
// will remain so. See A.2.2.
lower(c)
i = 1
} else {
return lower(c)
}
}
for ; i < maxIgnorable && c.next(); i++ {
switch c.info.cccType() {
case cccZero:
c.unreadRune()
return true
case cccAbove:
return c.writeString("\u0307") && c.copy() // See A.1.
default:
c.copy() // See A.1.
}
}
return i == maxIgnorable
}
// ltLowerSpan would be the same as isLower.
func ltUpper(f mapFunc) mapFunc {
return func(c *context) bool {
// Unicode:
// 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
//
// From CLDR:
// # Remove \u0307 following soft-dotteds (i, j, and the like), with possible
// # intervening non-230 marks.
// ::NFD();
// [:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ;
// ::Any-Upper();
// ::NFC();
// TODO: See A.5. A soft-dotted rune never has an exception. This would
// allow us to overload the exception bit and encode this property in
// info. Need to measure performance impact of this.
r, _ := utf8.DecodeRune(c.src[c.pSrc:])
oldPDst := c.pDst
if !f(c) {
return false
}
if !unicode.Is(unicode.Soft_Dotted, r) {
return true
}
// We don't need to do an NFD normalization, as a soft-dotted rune never
// contains U+0307. See A.3.
i := 0
for ; i < maxIgnorable && c.next(); i++ {
switch c.info.cccType() {
case cccZero:
c.unreadRune()
return true
case cccAbove:
if c.hasPrefix("\u0307") {
// We don't do a full NFC, but rather combine runes for
// some of the common cases. (Returning NFC or
// preserving normal form is neither a requirement nor
// a possibility anyway).
if !c.next() {
return false
}
if c.dst[oldPDst] == 'I' && c.pDst == oldPDst+1 && c.src[c.pSrc] == 0xcc {
s := ""
switch c.src[c.pSrc+1] {
case 0x80: // U+0300 COMBINING GRAVE ACCENT
s = "\u00cc" // U+00CC LATIN CAPITAL LETTER I WITH GRAVE
case 0x81: // U+0301 COMBINING ACUTE ACCENT
s = "\u00cd" // U+00CD LATIN CAPITAL LETTER I WITH ACUTE
case 0x83: // U+0303 COMBINING TILDE
s = "\u0128" // U+0128 LATIN CAPITAL LETTER I WITH TILDE
case 0x88: // U+0308 COMBINING DIAERESIS
s = "\u00cf" // U+00CF LATIN CAPITAL LETTER I WITH DIAERESIS
default:
}
if s != "" {
c.pDst = oldPDst
return c.writeString(s)
}
}
}
return c.copy()
default:
c.copy()
}
}
return i == maxIgnorable
}
}
// TODO: implement ltUpperSpan (low priority: complex and infrequent).
func aztrUpper(f mapFunc) mapFunc {
return func(c *context) bool {
// i→İ;
if c.src[c.pSrc] == 'i' {
return c.writeString("İ")
}
return f(c)
}
}
func aztrLower(c *context) (done bool) {
// From CLDR:
// # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
// # 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
// İ→i;
// # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
// # This matches the behavior of the canonically equivalent I-dot_above
// # 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
// # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
// # 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
// I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)\u0307 → i$1 ;
// I→ı ;
// ::Any-Lower();
if c.hasPrefix("\u0130") { // İ
return c.writeString("i")
}
if c.src[c.pSrc] != 'I' {
return lower(c)
}
// We ignore the lower-case I for now, but insert it later when we know
// which form we need.
start := c.pSrc + c.sz
i := 0
Loop:
// We check for up to n ignorables before \u0307. As \u0307 is an
// ignorable as well, n is maxIgnorable-1.
for ; i < maxIgnorable && c.next(); i++ {
switch c.info.cccType() {
case cccAbove:
if c.hasPrefix("\u0307") {
return c.writeString("i") && c.writeBytes(c.src[start:c.pSrc]) // ignore U+0307
}
done = true
break Loop
case cccZero:
c.unreadRune()
done = true
break Loop
default:
// We'll write this rune after we know which starter to use.
}
}
if i == maxIgnorable {
done = true
}
return c.writeString("ı") && c.writeBytes(c.src[start:c.pSrc+c.sz]) && done
}
// aztrLowerSpan would be the same as isLower.
func nlTitle(c *context) bool {
// From CLDR:
// # Special titlecasing for Dutch initial "ij".
// ::Any-Title();
// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
if c.src[c.pSrc] != 'I' && c.src[c.pSrc] != 'i' {
return title(c)
}
if !c.writeString("I") || !c.next() {
return false
}
if c.src[c.pSrc] == 'j' || c.src[c.pSrc] == 'J' {
return c.writeString("J")
}
c.unreadRune()
return true
}
func nlTitleSpan(c *context) bool {
// From CLDR:
// # Special titlecasing for Dutch initial "ij".
// ::Any-Title();
// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
if c.src[c.pSrc] != 'I' {
return isTitle(c)
}
if !c.next() || c.src[c.pSrc] == 'j' {
return false
}
if c.src[c.pSrc] != 'J' {
c.unreadRune()
}
return true
}
// Not part of CLDR, but see http://unicode.org/cldr/trac/ticket/7078.
func afnlRewrite(c *context) {
if c.hasPrefix("'") || c.hasPrefix("") {
c.isMidWord = true
}
}

2211
vendor/golang.org/x/text/cases/tables.go generated vendored Normal file

File diff suppressed because it is too large Load diff

215
vendor/golang.org/x/text/cases/trieval.go generated vendored Normal file
View file

@ -0,0 +1,215 @@
// This file was generated by go generate; DO NOT EDIT
package cases
// This file contains definitions for interpreting the trie value of the case
// trie generated by "go run gen*.go". It is shared by both the generator
// program and the resultant package. Sharing is achieved by the generator
// copying gen_trieval.go to trieval.go and changing what's above this comment.
// info holds case information for a single rune. It is the value returned
// by a trie lookup. Most mapping information can be stored in a single 16-bit
// value. If not, for example when a rune is mapped to multiple runes, the value
// stores some basic case data and an index into an array with additional data.
//
// The per-rune values have the following format:
//
// if (exception) {
// 15..5 unsigned exception index
// 4 unused
// } else {
// 15..8 XOR pattern or index to XOR pattern for case mapping
// Only 13..8 are used for XOR patterns.
// 7 inverseFold (fold to upper, not to lower)
// 6 index: interpret the XOR pattern as an index
// or isMid if case mode is cIgnorableUncased.
// 5..4 CCC: zero (normal or break), above or other
// }
// 3 exception: interpret this value as an exception index
// (TODO: is this bit necessary? Probably implied from case mode.)
// 2..0 case mode
//
// For the non-exceptional cases, a rune must be either uncased, lowercase or
// uppercase. If the rune is cased, the XOR pattern maps either a lowercase
// rune to uppercase or an uppercase rune to lowercase (applied to the 10
// least-significant bits of the rune).
//
// See the definitions below for a more detailed description of the various
// bits.
type info uint16
const (
casedMask = 0x0003
fullCasedMask = 0x0007
ignorableMask = 0x0006
ignorableValue = 0x0004
inverseFoldBit = 1 << 7
isMidBit = 1 << 6
exceptionBit = 1 << 3
exceptionShift = 5
numExceptionBits = 11
xorIndexBit = 1 << 6
xorShift = 8
// There is no mapping if all xor bits and the exception bit are zero.
hasMappingMask = 0xff80 | exceptionBit
)
// The case mode bits encodes the case type of a rune. This includes uncased,
// title, upper and lower case and case ignorable. (For a definition of these
// terms see Chapter 3 of The Unicode Standard Core Specification.) In some rare
// cases, a rune can be both cased and case-ignorable. This is encoded by
// cIgnorableCased. A rune of this type is always lower case. Some runes are
// cased while not having a mapping.
//
// A common pattern for scripts in the Unicode standard is for upper and lower
// case runes to alternate for increasing rune values (e.g. the accented Latin
// ranges starting from U+0100 and U+1E00 among others and some Cyrillic
// characters). We use this property by defining a cXORCase mode, where the case
// mode (always upper or lower case) is derived from the rune value. As the XOR
// pattern for case mappings is often identical for successive runes, using
// cXORCase can result in large series of identical trie values. This, in turn,
// allows us to better compress the trie blocks.
const (
cUncased info = iota // 000
cTitle // 001
cLower // 010
cUpper // 011
cIgnorableUncased // 100
cIgnorableCased // 101 // lower case if mappings exist
cXORCase // 11x // case is cLower | ((rune&1) ^ x)
maxCaseMode = cUpper
)
func (c info) isCased() bool {
return c&casedMask != 0
}
func (c info) isCaseIgnorable() bool {
return c&ignorableMask == ignorableValue
}
func (c info) isNotCasedAndNotCaseIgnorable() bool {
return c&fullCasedMask == 0
}
func (c info) isCaseIgnorableAndNotCased() bool {
return c&fullCasedMask == cIgnorableUncased
}
func (c info) isMid() bool {
return c&(fullCasedMask|isMidBit) == isMidBit|cIgnorableUncased
}
// The case mapping implementation will need to know about various Canonical
// Combining Class (CCC) values. We encode two of these in the trie value:
// cccZero (0) and cccAbove (230). If the value is cccOther, it means that
// CCC(r) > 0, but not 230. A value of cccBreak means that CCC(r) == 0 and that
// the rune also has the break category Break (see below).
const (
cccBreak info = iota << 4
cccZero
cccAbove
cccOther
cccMask = cccBreak | cccZero | cccAbove | cccOther
)
const (
starter = 0
above = 230
iotaSubscript = 240
)
// The exceptions slice holds data that does not fit in a normal info entry.
// The entry is pointed to by the exception index in an entry. It has the
// following format:
//
// Header
// byte 0:
// 7..6 unused
// 5..4 CCC type (same bits as entry)
// 3 unused
// 2..0 length of fold
//
// byte 1:
// 7..6 unused
// 5..3 length of 1st mapping of case type
// 2..0 length of 2nd mapping of case type
//
// case 1st 2nd
// lower -> upper, title
// upper -> lower, title
// title -> lower, upper
//
// Lengths with the value 0x7 indicate no value and implies no change.
// A length of 0 indicates a mapping to zero-length string.
//
// Body bytes:
// case folding bytes
// lowercase mapping bytes
// uppercase mapping bytes
// titlecase mapping bytes
// closure mapping bytes (for NFKC_Casefold). (TODO)
//
// Fallbacks:
// missing fold -> lower
// missing title -> upper
// all missing -> original rune
//
// exceptions starts with a dummy byte to enforce that there is no zero index
// value.
const (
lengthMask = 0x07
lengthBits = 3
noChange = 0
)
// References to generated trie.
var trie = newCaseTrie(0)
var sparse = sparseBlocks{
values: sparseValues[:],
offsets: sparseOffsets[:],
}
// Sparse block lookup code.
// valueRange is an entry in a sparse block.
type valueRange struct {
value uint16
lo, hi byte
}
type sparseBlocks struct {
values []valueRange
offsets []uint16
}
// lookup returns the value from values block n for byte b using binary search.
func (s *sparseBlocks) lookup(n uint32, b byte) uint16 {
lo := s.offsets[n]
hi := s.offsets[n+1]
for lo < hi {
m := lo + (hi-lo)/2
r := s.values[m]
if r.lo <= b && b <= r.hi {
return r.value
}
if b < r.lo {
hi = m
} else {
lo = m + 1
}
}
return 0
}
// lastRuneForTesting is the last rune used for testing. Everything after this
// is boring.
const lastRuneForTesting = rune(0x1FFFF)