1
0
Fork 0

Update libkv dependency

This commit is contained in:
NicoMen 2017-11-17 17:22:03 +01:00 committed by Traefiker
parent cdab6b1796
commit 66e489addb
237 changed files with 62817 additions and 16116 deletions

View file

@ -35,7 +35,7 @@ import (
// A Caser may be stateful and should therefore not be shared between
// goroutines.
type Caser struct {
t transform.Transformer
t transform.SpanningTransformer
}
// Bytes returns a new byte slice with the result of converting b to the case
@ -56,12 +56,17 @@ func (c Caser) String(s string) string {
// Transform.
func (c Caser) Reset() { c.t.Reset() }
// Transform implements the Transformer interface and transforms the given input
// to the case form implemented by c.
// Transform implements the transform.Transformer interface and transforms the
// given input to the case form implemented by c.
func (c Caser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
return c.t.Transform(dst, src, atEOF)
}
// Span implements the transform.SpanningTransformer interface.
func (c Caser) Span(src []byte, atEOF bool) (n int, err error) {
return c.t.Span(src, atEOF)
}
// Upper returns a Caser for language-specific uppercasing.
func Upper(t language.Tag, opts ...Option) Caser {
return Caser{makeUpper(t, getOpts(opts...))}
@ -83,14 +88,20 @@ func Title(t language.Tag, opts ...Option) Caser {
//
// Case folding does not normalize the input and may not preserve a normal form.
// Use the collate or search package for more convenient and linguistically
// sound comparisons. Use unicode/precis for string comparisons where security
// aspects are a concern.
// sound comparisons. Use golang.org/x/text/secure/precis for string comparisons
// where security aspects are a concern.
func Fold(opts ...Option) Caser {
return Caser{makeFold(getOpts(opts...))}
}
// An Option is used to modify the behavior of a Caser.
type Option func(o *options)
type Option func(o options) options
// TODO: consider these options to take a boolean as well, like FinalSigma.
// The advantage of using this approach is that other providers of a lower-case
// algorithm could set different defaults by prefixing a user-provided slice
// of options with their own. This is handy, for instance, for the precis
// package which would override the default to not handle the Greek final sigma.
var (
// NoLower disables the lowercasing of non-leading letters for a title
@ -110,20 +121,42 @@ type options struct {
// TODO: segmenter, max ignorable, alternative versions, etc.
noFinalSigma bool // Only used for testing.
ignoreFinalSigma bool
}
func getOpts(o ...Option) (res options) {
for _, f := range o {
f(&res)
res = f(res)
}
return
}
func noLower(o *options) {
func noLower(o options) options {
o.noLower = true
return o
}
func compact(o *options) {
func compact(o options) options {
o.simple = true
return o
}
// HandleFinalSigma specifies whether the special handling of Greek final sigma
// should be enabled. Unicode prescribes handling the Greek final sigma for all
// locales, but standards like IDNA and PRECIS override this default.
func HandleFinalSigma(enable bool) Option {
if enable {
return handleFinalSigma
}
return ignoreFinalSigma
}
func ignoreFinalSigma(o options) options {
o.ignoreFinalSigma = true
return o
}
func handleFinalSigma(o options) options {
o.ignoreFinalSigma = false
return o
}

View file

@ -4,9 +4,7 @@
package cases
import (
"golang.org/x/text/transform"
)
import "golang.org/x/text/transform"
// A context is used for iterating over source bytes, fetching case info and
// writing to a destination buffer.
@ -56,6 +54,14 @@ func (c *context) ret() (nDst, nSrc int, err error) {
return c.nDst, c.nSrc, transform.ErrShortSrc
}
// retSpan returns the return values for the Span method. It checks whether
// there were insufficient bytes in src to complete and introduces an error
// accordingly, if necessary.
func (c *context) retSpan() (n int, err error) {
_, nSrc, err := c.ret()
return nSrc, err
}
// checkpoint sets the return value buffer points for Transform to the current
// positions.
func (c *context) checkpoint() {
@ -200,6 +206,23 @@ func lower(c *context) bool {
return c.copy()
}
func isLower(c *context) bool {
ct := c.caseType()
if c.info&hasMappingMask == 0 || ct == cLower {
return true
}
if c.info&exceptionBit == 0 {
c.err = transform.ErrEndOfSpan
return false
}
e := exceptions[c.info>>exceptionShift:]
if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
c.err = transform.ErrEndOfSpan
return false
}
return true
}
// upper writes the uppercase version of the current rune to dst.
func upper(c *context) bool {
ct := c.caseType()
@ -226,6 +249,29 @@ func upper(c *context) bool {
return c.copy()
}
// isUpper writes the isUppercase version of the current rune to dst.
func isUpper(c *context) bool {
ct := c.caseType()
if c.info&hasMappingMask == 0 || ct == cUpper {
return true
}
if c.info&exceptionBit == 0 {
c.err = transform.ErrEndOfSpan
return false
}
e := exceptions[c.info>>exceptionShift:]
// Get length of first special case mapping.
n := (e[1] >> lengthBits) & lengthMask
if ct == cTitle {
n = e[1] & lengthMask
}
if n != noChange {
c.err = transform.ErrEndOfSpan
return false
}
return true
}
// title writes the title case version of the current rune to dst.
func title(c *context) bool {
ct := c.caseType()
@ -257,6 +303,33 @@ func title(c *context) bool {
return c.copy()
}
// isTitle reports whether the current rune is in title case.
func isTitle(c *context) bool {
ct := c.caseType()
if c.info&hasMappingMask == 0 || ct == cTitle {
return true
}
if c.info&exceptionBit == 0 {
if ct == cLower {
c.err = transform.ErrEndOfSpan
return false
}
return true
}
// Get the exception data.
e := exceptions[c.info>>exceptionShift:]
if nTitle := e[1] & lengthMask; nTitle != noChange {
c.err = transform.ErrEndOfSpan
return false
}
nFirst := (e[1] >> lengthBits) & lengthMask
if ct == cLower && nFirst != noChange {
c.err = transform.ErrEndOfSpan
return false
}
return true
}
// foldFull writes the foldFull version of the current rune to dst.
func foldFull(c *context) bool {
if c.info&hasMappingMask == 0 {
@ -279,3 +352,25 @@ func foldFull(c *context) bool {
}
return c.writeString(e[2 : 2+n])
}
// isFoldFull reports whether the current run is mapped to foldFull
func isFoldFull(c *context) bool {
if c.info&hasMappingMask == 0 {
return true
}
ct := c.caseType()
if c.info&exceptionBit == 0 {
if ct != cLower || c.info&inverseFoldBit != 0 {
c.err = transform.ErrEndOfSpan
return false
}
return true
}
e := exceptions[c.info>>exceptionShift:]
n := e[0] & lengthMask
if n == 0 && ct == cLower {
return true
}
c.err = transform.ErrEndOfSpan
return false
}

View file

@ -18,7 +18,15 @@ func (t *caseFolder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err
return c.ret()
}
func makeFold(o options) transform.Transformer {
func (t *caseFolder) Span(src []byte, atEOF bool) (n int, err error) {
c := context{src: src, atEOF: atEOF}
for c.next() && isFoldFull(&c) {
c.checkpoint()
}
return c.retSpan()
}
func makeFold(o options) transform.SpanningTransformer {
// TODO: Special case folding, through option Language, Special/Turkic, or
// both.
// TODO: Implement Compact options.

View file

@ -76,7 +76,7 @@ type breakCategory int
const (
breakBreak breakCategory = iota
breakLetter
breakIgnored
breakMid
)
// mapping returns the case mapping for the given case type.
@ -162,9 +162,14 @@ func parseUCD() []runeInfo {
// We collapse the word breaking properties onto the categories we need.
switch p.String(1) { // TODO: officially we need to canonicalize.
case "Format", "MidLetter", "MidNumLet", "Single_Quote":
ri.BreakCat = breakIgnored
case "ALetter", "Hebrew_Letter", "Numeric", "Extend", "ExtendNumLet":
case "MidLetter", "MidNumLet", "Single_Quote":
ri.BreakCat = breakMid
if !ri.CaseIgnorable {
// finalSigma relies on the fact that all breakMid runes are
// also a Case_Ignorable. Revisit this code when this changes.
log.Fatalf("Rune %U, which has a break category mid, is not a case ignorable", ri)
}
case "ALetter", "Hebrew_Letter", "Numeric", "Extend", "ExtendNumLet", "Format", "ZWJ":
ri.BreakCat = breakLetter
}
})
@ -240,8 +245,11 @@ func makeEntry(ri *runeInfo) {
case above: // Above
ccc = cccAbove
}
if ri.BreakCat == breakBreak {
switch ri.BreakCat {
case breakBreak:
ccc = cccBreak
case breakMid:
ri.entry |= isMidBit
}
ri.entry |= ccc
@ -690,7 +698,7 @@ func genTablesTest() {
parse("auxiliary/WordBreakProperty.txt", func(p *ucd.Parser) {
switch p.String(1) {
case "Extend", "Format", "MidLetter", "MidNumLet", "Single_Quote",
"ALetter", "Hebrew_Letter", "Numeric", "ExtendNumLet":
"ALetter", "Hebrew_Letter", "Numeric", "ExtendNumLet", "ZWJ":
notBreak[p.Rune(0)] = true
}
})

View file

@ -26,6 +26,7 @@ package main
// Only 13..8 are used for XOR patterns.
// 7 inverseFold (fold to upper, not to lower)
// 6 index: interpret the XOR pattern as an index
// or isMid if case mode is cIgnorableUncased.
// 5..4 CCC: zero (normal or break), above or other
// }
// 3 exception: interpret this value as an exception index
@ -48,6 +49,7 @@ const (
ignorableValue = 0x0004
inverseFoldBit = 1 << 7
isMidBit = 1 << 6
exceptionBit = 1 << 3
exceptionShift = 5
@ -57,7 +59,7 @@ const (
xorShift = 8
// There is no mapping if all xor bits and the exception bit are zero.
hasMappingMask = 0xffc0 | exceptionBit
hasMappingMask = 0xff80 | exceptionBit
)
// The case mode bits encodes the case type of a rune. This includes uncased,
@ -95,10 +97,6 @@ func (c info) isCaseIgnorable() bool {
return c&ignorableMask == ignorableValue
}
func (c info) isCaseIgnorableAndNonBreakStarter() bool {
return c&(fullCasedMask|cccMask) == (ignorableValue | cccZero)
}
func (c info) isNotCasedAndNotCaseIgnorable() bool {
return c&fullCasedMask == 0
}
@ -107,6 +105,10 @@ func (c info) isCaseIgnorableAndNotCased() bool {
return c&fullCasedMask == cIgnorableUncased
}
func (c info) isMid() bool {
return c&(fullCasedMask|isMidBit) == isMidBit|cIgnorableUncased
}
// The case mapping implementation will need to know about various Canonical
// Combining Class (CCC) values. We encode two of these in the trie value:
// cccZero (0) and cccAbove (230). If the value is cccOther, it means that

61
vendor/golang.org/x/text/cases/icu.go generated vendored Normal file
View file

@ -0,0 +1,61 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build icu
package cases
// Ideally these functions would be defined in a test file, but go test doesn't
// allow CGO in tests. The build tag should ensure either way that these
// functions will not end up in the package.
// TODO: Ensure that the correct ICU version is set.
/*
#cgo LDFLAGS: -licui18n.57 -licuuc.57
#include <stdlib.h>
#include <unicode/ustring.h>
#include <unicode/utypes.h>
#include <unicode/localpointer.h>
#include <unicode/ucasemap.h>
*/
import "C"
import "unsafe"
func doICU(tag, caser, input string) string {
err := C.UErrorCode(0)
loc := C.CString(tag)
cm := C.ucasemap_open(loc, C.uint32_t(0), &err)
buf := make([]byte, len(input)*4)
dst := (*C.char)(unsafe.Pointer(&buf[0]))
src := C.CString(input)
cn := C.int32_t(0)
switch caser {
case "fold":
cn = C.ucasemap_utf8FoldCase(cm,
dst, C.int32_t(len(buf)),
src, C.int32_t(len(input)),
&err)
case "lower":
cn = C.ucasemap_utf8ToLower(cm,
dst, C.int32_t(len(buf)),
src, C.int32_t(len(input)),
&err)
case "upper":
cn = C.ucasemap_utf8ToUpper(cm,
dst, C.int32_t(len(buf)),
src, C.int32_t(len(input)),
&err)
case "title":
cn = C.ucasemap_utf8ToTitle(cm,
dst, C.int32_t(len(buf)),
src, C.int32_t(len(input)),
&err)
}
return string(buf[:cn])
}

View file

@ -28,9 +28,6 @@ func (c info) cccType() info {
// only makes sense, though, if the performance and/or space penalty of using
// the generic breaker is big. Extra data will only be needed for non-cased
// runes, which means there are sufficient bits left in the caseType.
// Also note that the standard breaking algorithm doesn't always make sense
// for title casing. For example, a4a -> A4a, but a"4a -> A"4A (where " stands
// for modifier \u0308).
// ICU prohibits breaking in such cases as well.
// For the purpose of title casing we use an approximation of the Unicode Word
@ -41,17 +38,19 @@ func (c info) cccType() info {
// categories, with associated rules:
//
// 1) Letter:
// ALetter, Hebrew_Letter, Numeric, ExtendNumLet, Extend.
// ALetter, Hebrew_Letter, Numeric, ExtendNumLet, Extend, Format_FE, ZWJ.
// Rule: Never break between consecutive runes of this category.
//
// 2) Mid:
// Format, MidLetter, MidNumLet, Single_Quote.
// (Cf. case-ignorable: MidLetter, MidNumLet or cat is Mn, Me, Cf, Lm or Sk).
// MidLetter, MidNumLet, Single_Quote.
// (Cf. case-ignorable: MidLetter, MidNumLet, Single_Quote or cat is Mn,
// Me, Cf, Lm or Sk).
// Rule: Don't break between Letter and Mid, but break between two Mids.
//
// 3) Break:
// Any other category, including NewLine, CR, LF and Double_Quote. These
// categories should always result in a break between two cased letters.
// Any other category: NewLine, MidNum, CR, LF, Double_Quote, Katakana, and
// Other.
// These categories should always result in a break between two cased letters.
// Rule: Always break.
//
// Note 1: the Katakana and MidNum categories can, in esoteric cases, result in

389
vendor/golang.org/x/text/cases/map.go generated vendored
View file

@ -13,6 +13,7 @@ import (
"unicode"
"unicode/utf8"
"golang.org/x/text/internal"
"golang.org/x/text/language"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
@ -24,6 +25,11 @@ import (
// dst so far won't need changing as we see more source bytes.
type mapFunc func(*context) bool
// A spanFunc takes a context set to the current rune and returns whether this
// rune would be altered when written to the output. It may advance the context
// to the next rune. It returns whether a checkpoint is possible.
type spanFunc func(*context) bool
// maxIgnorable defines the maximum number of ignorables to consider for
// lookahead operations.
const maxIgnorable = 30
@ -36,12 +42,12 @@ func init() {
for _, s := range strings.Split(supported, " ") {
tags = append(tags, language.MustParse(s))
}
matcher = language.NewMatcher(tags)
matcher = internal.NewInheritanceMatcher(tags)
Supported = language.NewCoverage(tags)
}
var (
matcher language.Matcher
matcher *internal.InheritanceMatcher
Supported language.Coverage
@ -50,56 +56,69 @@ var (
// Some uppercase mappers are stateless, so we can precompute the
// Transformers and save a bit on runtime allocations.
upperFunc = []mapFunc{
nil, // und
nil, // af
aztrUpper(upper), // az
elUpper, // el
ltUpper(upper), // lt
nil, // nl
aztrUpper(upper), // tr
upperFunc = []struct {
upper mapFunc
span spanFunc
}{
{nil, nil}, // und
{nil, nil}, // af
{aztrUpper(upper), isUpper}, // az
{elUpper, noSpan}, // el
{ltUpper(upper), noSpan}, // lt
{nil, nil}, // nl
{aztrUpper(upper), isUpper}, // tr
}
undUpper transform.Transformer = &undUpperCaser{}
undUpper transform.SpanningTransformer = &undUpperCaser{}
undLower transform.SpanningTransformer = &undLowerCaser{}
undLowerIgnoreSigma transform.SpanningTransformer = &undLowerIgnoreSigmaCaser{}
lowerFunc = []mapFunc{
lower, // und
lower, // af
nil, // und
nil, // af
aztrLower, // az
lower, // el
nil, // el
ltLower, // lt
lower, // nl
nil, // nl
aztrLower, // tr
}
titleInfos = []struct {
title, lower mapFunc
rewrite func(*context)
title mapFunc
lower mapFunc
titleSpan spanFunc
rewrite func(*context)
}{
{title, lower, nil}, // und
{title, lower, afnlRewrite}, // af
{aztrUpper(title), aztrLower, nil}, // az
{title, lower, nil}, // el
{ltUpper(title), ltLower, nil}, // lt
{nlTitle, lower, afnlRewrite}, // nl
{aztrUpper(title), aztrLower, nil}, // tr
{title, lower, isTitle, nil}, // und
{title, lower, isTitle, afnlRewrite}, // af
{aztrUpper(title), aztrLower, isTitle, nil}, // az
{title, lower, isTitle, nil}, // el
{ltUpper(title), ltLower, noSpan, nil}, // lt
{nlTitle, lower, nlTitleSpan, afnlRewrite}, // nl
{aztrUpper(title), aztrLower, isTitle, nil}, // tr
}
)
func makeUpper(t language.Tag, o options) transform.Transformer {
func makeUpper(t language.Tag, o options) transform.SpanningTransformer {
_, i, _ := matcher.Match(t)
f := upperFunc[i]
f := upperFunc[i].upper
if f == nil {
return undUpper
}
return &simpleCaser{f: f}
return &simpleCaser{f: f, span: upperFunc[i].span}
}
func makeLower(t language.Tag, o options) transform.Transformer {
func makeLower(t language.Tag, o options) transform.SpanningTransformer {
_, i, _ := matcher.Match(t)
f := lowerFunc[i]
if o.noFinalSigma {
return &simpleCaser{f: f}
if f == nil {
if o.ignoreFinalSigma {
return undLowerIgnoreSigma
}
return undLower
}
if o.ignoreFinalSigma {
return &simpleCaser{f: f, span: isLower}
}
return &lowerCaser{
first: f,
@ -107,22 +126,28 @@ func makeLower(t language.Tag, o options) transform.Transformer {
}
}
func makeTitle(t language.Tag, o options) transform.Transformer {
func makeTitle(t language.Tag, o options) transform.SpanningTransformer {
_, i, _ := matcher.Match(t)
x := &titleInfos[i]
lower := x.lower
if o.noLower {
lower = (*context).copy
} else if !o.noFinalSigma {
} else if !o.ignoreFinalSigma {
lower = finalSigma(lower)
}
return &titleCaser{
title: x.title,
lower: lower,
rewrite: x.rewrite,
title: x.title,
lower: lower,
titleSpan: x.titleSpan,
rewrite: x.rewrite,
}
}
func noSpan(c *context) bool {
c.err = transform.ErrEndOfSpan
return false
}
// TODO: consider a similar special case for the fast majority lower case. This
// is a bit more involved so will require some more precise benchmarking to
// justify it.
@ -132,7 +157,7 @@ type undUpperCaser struct{ transform.NopResetter }
// undUpperCaser implements the Transformer interface for doing an upper case
// mapping for the root locale (und). It eliminates the need for an allocation
// as it prevents escaping by not using function pointers.
func (t *undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
func (t undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
c := context{dst: dst, src: src, atEOF: atEOF}
for c.next() {
upper(&c)
@ -141,26 +166,117 @@ func (t *undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int,
return c.ret()
}
func (t undUpperCaser) Span(src []byte, atEOF bool) (n int, err error) {
c := context{src: src, atEOF: atEOF}
for c.next() && isUpper(&c) {
c.checkpoint()
}
return c.retSpan()
}
// undLowerIgnoreSigmaCaser implements the Transformer interface for doing
// a lower case mapping for the root locale (und) ignoring final sigma
// handling. This casing algorithm is used in some performance-critical packages
// like secure/precis and x/net/http/idna, which warrants its special-casing.
type undLowerIgnoreSigmaCaser struct{ transform.NopResetter }
func (t undLowerIgnoreSigmaCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
c := context{dst: dst, src: src, atEOF: atEOF}
for c.next() && lower(&c) {
c.checkpoint()
}
return c.ret()
}
// Span implements a generic lower-casing. This is possible as isLower works
// for all lowercasing variants. All lowercase variants only vary in how they
// transform a non-lowercase letter. They will never change an already lowercase
// letter. In addition, there is no state.
func (t undLowerIgnoreSigmaCaser) Span(src []byte, atEOF bool) (n int, err error) {
c := context{src: src, atEOF: atEOF}
for c.next() && isLower(&c) {
c.checkpoint()
}
return c.retSpan()
}
type simpleCaser struct {
context
f mapFunc
f mapFunc
span spanFunc
}
// simpleCaser implements the Transformer interface for doing a case operation
// on a rune-by-rune basis.
func (t *simpleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
t.context = context{dst: dst, src: src, atEOF: atEOF}
c := &t.context
for c.next() && t.f(c) {
c := context{dst: dst, src: src, atEOF: atEOF}
for c.next() && t.f(&c) {
c.checkpoint()
}
return c.ret()
}
func (t *simpleCaser) Span(src []byte, atEOF bool) (n int, err error) {
c := context{src: src, atEOF: atEOF}
for c.next() && t.span(&c) {
c.checkpoint()
}
return c.retSpan()
}
// undLowerCaser implements the Transformer interface for doing a lower case
// mapping for the root locale (und) ignoring final sigma handling. This casing
// algorithm is used in some performance-critical packages like secure/precis
// and x/net/http/idna, which warrants its special-casing.
type undLowerCaser struct{ transform.NopResetter }
func (t undLowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
c := context{dst: dst, src: src, atEOF: atEOF}
for isInterWord := true; c.next(); {
if isInterWord {
if c.info.isCased() {
if !lower(&c) {
break
}
isInterWord = false
} else if !c.copy() {
break
}
} else {
if c.info.isNotCasedAndNotCaseIgnorable() {
if !c.copy() {
break
}
isInterWord = true
} else if !c.hasPrefix("Σ") {
if !lower(&c) {
break
}
} else if !finalSigmaBody(&c) {
break
}
}
c.checkpoint()
}
return c.ret()
}
func (t undLowerCaser) Span(src []byte, atEOF bool) (n int, err error) {
c := context{src: src, atEOF: atEOF}
for c.next() && isLower(&c) {
c.checkpoint()
}
return c.retSpan()
}
// lowerCaser implements the Transformer interface. The default Unicode lower
// casing requires different treatment for the first and subsequent characters
// of a word, most notably to handle the Greek final Sigma.
type lowerCaser struct {
undLowerIgnoreSigmaCaser
context
first, midWord mapFunc
@ -202,7 +318,9 @@ type titleCaser struct {
context
// rune mappings used by the actual casing algorithms.
title, lower mapFunc
title mapFunc
lower mapFunc
titleSpan spanFunc
rewrite func(*context)
}
@ -228,10 +346,10 @@ func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err
t.rewrite(c)
}
wasMid := p.isCaseIgnorableAndNonBreakStarter()
wasMid := p.isMid()
// Break out of this loop on failure to ensure we do not modify the
// state incorrectly.
if p.isCased() && !p.isCaseIgnorableAndNotCased() {
if p.isCased() {
if !c.isMidWord {
if !t.title(c) {
break
@ -242,71 +360,139 @@ func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err
}
} else if !c.copy() {
break
}
// TODO: make this an "else if" if we can prove that no rune that does
// not match the first condition of the if statement can be a break.
if p.isBreak() {
} else if p.isBreak() {
c.isMidWord = false
}
// As we save the state of the transformer, it is safe to call
// checkpoint after any successful write.
c.checkpoint()
if !(c.isMidWord && wasMid) {
c.checkpoint()
}
if !c.next() {
break
}
if wasMid && c.info.isCaseIgnorableAndNonBreakStarter() {
if wasMid && c.info.isMid() {
c.isMidWord = false
}
}
return c.ret()
}
func (t *titleCaser) Span(src []byte, atEOF bool) (n int, err error) {
t.context = context{src: src, atEOF: atEOF, isMidWord: t.isMidWord}
c := &t.context
if !c.next() {
return c.retSpan()
}
for {
p := c.info
if t.rewrite != nil {
t.rewrite(c)
}
wasMid := p.isMid()
// Break out of this loop on failure to ensure we do not modify the
// state incorrectly.
if p.isCased() {
if !c.isMidWord {
if !t.titleSpan(c) {
break
}
c.isMidWord = true
} else if !isLower(c) {
break
}
} else if p.isBreak() {
c.isMidWord = false
}
// As we save the state of the transformer, it is safe to call
// checkpoint after any successful write.
if !(c.isMidWord && wasMid) {
c.checkpoint()
}
if !c.next() {
break
}
if wasMid && c.info.isMid() {
c.isMidWord = false
}
}
return c.retSpan()
}
// finalSigma adds Greek final Sigma handing to another casing function. It
// determines whether a lowercased sigma should be σ or ς, by looking ahead for
// case-ignorables and a cased letters.
func finalSigma(f mapFunc) mapFunc {
return func(c *context) bool {
// ::NFD();
// # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
// Σ } [:case-ignorable:]* [:cased:] → σ;
// [:cased:] [:case-ignorable:]* { Σ → ς;
// ::Any-Lower;
// ::NFC();
if !c.hasPrefix("Σ") {
return f(c)
}
p := c.pDst
c.writeString("ς")
// We need to do one more iteration after maxIgnorable, as a cased
// letter is not an ignorable and may modify the result.
for i := 0; i < maxIgnorable+1; i++ {
if !c.next() {
return false
}
if !c.info.isCaseIgnorable() {
if c.info.isCased() {
// p+1 is guaranteed to be in bounds: if writing ς was
// successful, p+1 will contain the second byte of ς. If not,
// this function will have returned after c.next returned false.
c.dst[p+1]++ // ς → σ
}
c.unreadRune()
return true
}
// A case ignorable may also introduce a word break, so we may need
// to continue searching even after detecting a break.
c.isMidWord = c.isMidWord && !c.info.isBreak()
c.copy()
}
return true
return finalSigmaBody(c)
}
}
func finalSigmaBody(c *context) bool {
// Current rune must be ∑.
// ::NFD();
// # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
// Σ } [:case-ignorable:]* [:cased:] → σ;
// [:cased:] [:case-ignorable:]* { Σ → ς;
// ::Any-Lower;
// ::NFC();
p := c.pDst
c.writeString("ς")
// TODO: we should do this here, but right now this will never have an
// effect as this is called when the prefix is Sigma, whereas Dutch and
// Afrikaans only test for an apostrophe.
//
// if t.rewrite != nil {
// t.rewrite(c)
// }
// We need to do one more iteration after maxIgnorable, as a cased
// letter is not an ignorable and may modify the result.
wasMid := false
for i := 0; i < maxIgnorable+1; i++ {
if !c.next() {
return false
}
if !c.info.isCaseIgnorable() {
// All Midword runes are also case ignorable, so we are
// guaranteed to have a letter or word break here. As we are
// unreading the run, there is no need to unset c.isMidWord;
// the title caser will handle this.
if c.info.isCased() {
// p+1 is guaranteed to be in bounds: if writing ς was
// successful, p+1 will contain the second byte of ς. If not,
// this function will have returned after c.next returned false.
c.dst[p+1]++ // ς → σ
}
c.unreadRune()
return true
}
// A case ignorable may also introduce a word break, so we may need
// to continue searching even after detecting a break.
isMid := c.info.isMid()
if (wasMid && isMid) || c.info.isBreak() {
c.isMidWord = false
}
wasMid = isMid
c.copy()
}
return true
}
// finalSigmaSpan would be the same as isLower.
// elUpper implements Greek upper casing, which entails removing a predefined
// set of non-blocked modifiers. Note that these accents should not be removed
// for title casing!
@ -376,6 +562,8 @@ func elUpper(c *context) bool {
return i == maxIgnorable
}
// TODO: implement elUpperSpan (low-priority: complex and infrequent).
func ltLower(c *context) bool {
// From CLDR:
// # Introduce an explicit dot above when lowercasing capital I's and J's
@ -390,10 +578,10 @@ func ltLower(c *context) bool {
// ::NFD();
// I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
// J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
// Į } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → į \u0307;
// Ì → i \u0307 \u0300;
// Í → i \u0307 \u0301;
// Ĩ → i \u0307 \u0303;
// I \u0328 (Į) } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307;
// I \u0300 (Ì) → i \u0307 \u0300;
// I \u0301 (Í) → i \u0307 \u0301;
// I \u0303 (Ĩ) → i \u0307 \u0303;
// ::Any-Lower();
// ::NFC();
@ -445,9 +633,16 @@ func ltLower(c *context) bool {
return i == maxIgnorable
}
// ltLowerSpan would be the same as isLower.
func ltUpper(f mapFunc) mapFunc {
return func(c *context) bool {
// Unicode:
// 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
//
// From CLDR:
// # Remove \u0307 following soft-dotteds (i, j, and the like), with possible
// # intervening non-230 marks.
// ::NFD();
// [:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ;
// ::Any-Upper();
@ -511,6 +706,8 @@ func ltUpper(f mapFunc) mapFunc {
}
}
// TODO: implement ltUpperSpan (low priority: complex and infrequent).
func aztrUpper(f mapFunc) mapFunc {
return func(c *context) bool {
// i→İ;
@ -571,6 +768,8 @@ Loop:
return c.writeString("ı") && c.writeBytes(c.src[start:c.pSrc+c.sz]) && done
}
// aztrLowerSpan would be the same as isLower.
func nlTitle(c *context) bool {
// From CLDR:
// # Special titlecasing for Dutch initial "ij".
@ -591,6 +790,24 @@ func nlTitle(c *context) bool {
return true
}
func nlTitleSpan(c *context) bool {
// From CLDR:
// # Special titlecasing for Dutch initial "ij".
// ::Any-Title();
// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
if c.src[c.pSrc] != 'I' {
return isTitle(c)
}
if !c.next() || c.src[c.pSrc] == 'j' {
return false
}
if c.src[c.pSrc] != 'J' {
c.unreadRune()
}
return true
}
// Not part of CLDR, but see http://unicode.org/cldr/trac/ticket/7078.
func afnlRewrite(c *context) {
if c.hasPrefix("'") || c.hasPrefix("") {

File diff suppressed because it is too large Load diff

View file

@ -1,4 +1,4 @@
// This file was generated by go generate; DO NOT EDIT
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package cases
@ -22,6 +22,7 @@ package cases
// Only 13..8 are used for XOR patterns.
// 7 inverseFold (fold to upper, not to lower)
// 6 index: interpret the XOR pattern as an index
// or isMid if case mode is cIgnorableUncased.
// 5..4 CCC: zero (normal or break), above or other
// }
// 3 exception: interpret this value as an exception index
@ -44,6 +45,7 @@ const (
ignorableValue = 0x0004
inverseFoldBit = 1 << 7
isMidBit = 1 << 6
exceptionBit = 1 << 3
exceptionShift = 5
@ -53,7 +55,7 @@ const (
xorShift = 8
// There is no mapping if all xor bits and the exception bit are zero.
hasMappingMask = 0xffc0 | exceptionBit
hasMappingMask = 0xff80 | exceptionBit
)
// The case mode bits encodes the case type of a rune. This includes uncased,
@ -91,10 +93,6 @@ func (c info) isCaseIgnorable() bool {
return c&ignorableMask == ignorableValue
}
func (c info) isCaseIgnorableAndNonBreakStarter() bool {
return c&(fullCasedMask|cccMask) == (ignorableValue | cccZero)
}
func (c info) isNotCasedAndNotCaseIgnorable() bool {
return c&fullCasedMask == 0
}
@ -103,6 +101,10 @@ func (c info) isCaseIgnorableAndNotCased() bool {
return c&fullCasedMask == cIgnorableUncased
}
func (c info) isMid() bool {
return c&(fullCasedMask|isMidBit) == isMidBit|cIgnorableUncased
}
// The case mapping implementation will need to know about various Canonical
// Combining Class (CCC) values. We encode two of these in the trie value:
// cccZero (0) and cccAbove (230). If the value is cccOther, it means that