1
0
Fork 0

Upgrade k8s.io/client-go to version 2

This commit is contained in:
Ed Robinson 2017-04-07 11:49:53 +01:00
parent a3b95f798b
commit 6f4c5dd4ce
No known key found for this signature in database
GPG key ID: EC501FCA6421CCF0
675 changed files with 109006 additions and 90744 deletions

View file

@ -35,7 +35,7 @@ import (
// A Caser may be stateful and should therefore not be shared between
// goroutines.
type Caser struct {
t transform.SpanningTransformer
t transform.Transformer
}
// Bytes returns a new byte slice with the result of converting b to the case
@ -56,17 +56,12 @@ func (c Caser) String(s string) string {
// Transform.
func (c Caser) Reset() { c.t.Reset() }
// Transform implements the transform.Transformer interface and transforms the
// given input to the case form implemented by c.
// Transform implements the Transformer interface and transforms the given input
// to the case form implemented by c.
func (c Caser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
return c.t.Transform(dst, src, atEOF)
}
// Span implements the transform.SpanningTransformer interface.
func (c Caser) Span(src []byte, atEOF bool) (n int, err error) {
return c.t.Span(src, atEOF)
}
// Upper returns a Caser for language-specific uppercasing.
func Upper(t language.Tag, opts ...Option) Caser {
return Caser{makeUpper(t, getOpts(opts...))}
@ -95,13 +90,7 @@ func Fold(opts ...Option) Caser {
}
// An Option is used to modify the behavior of a Caser.
type Option func(o options) options
// TODO: consider these options to take a boolean as well, like FinalSigma.
// The advantage of using this approach is that other providers of a lower-case
// algorithm could set different defaults by prefixing a user-provided slice
// of options with their own. This is handy, for instance, for the precis
// package which would override the default to not handle the Greek final sigma.
type Option func(o *options)
var (
// NoLower disables the lowercasing of non-leading letters for a title
@ -121,42 +110,20 @@ type options struct {
// TODO: segmenter, max ignorable, alternative versions, etc.
ignoreFinalSigma bool
noFinalSigma bool // Only used for testing.
}
func getOpts(o ...Option) (res options) {
for _, f := range o {
res = f(res)
f(&res)
}
return
}
func noLower(o options) options {
func noLower(o *options) {
o.noLower = true
return o
}
func compact(o options) options {
func compact(o *options) {
o.simple = true
return o
}
// HandleFinalSigma specifies whether the special handling of Greek final sigma
// should be enabled. Unicode prescribes handling the Greek final sigma for all
// locales, but standards like IDNA and PRECIS override this default.
func HandleFinalSigma(enable bool) Option {
if enable {
return handleFinalSigma
}
return ignoreFinalSigma
}
func ignoreFinalSigma(o options) options {
o.ignoreFinalSigma = true
return o
}
func handleFinalSigma(o options) options {
o.ignoreFinalSigma = false
return o
}

View file

@ -4,7 +4,9 @@
package cases
import "golang.org/x/text/transform"
import (
"golang.org/x/text/transform"
)
// A context is used for iterating over source bytes, fetching case info and
// writing to a destination buffer.
@ -54,14 +56,6 @@ func (c *context) ret() (nDst, nSrc int, err error) {
return c.nDst, c.nSrc, transform.ErrShortSrc
}
// retSpan returns the return values for the Span method. It checks whether
// there were insufficient bytes in src to complete and introduces an error
// accordingly, if necessary.
func (c *context) retSpan() (n int, err error) {
_, nSrc, err := c.ret()
return nSrc, err
}
// checkpoint sets the return value buffer points for Transform to the current
// positions.
func (c *context) checkpoint() {
@ -206,23 +200,6 @@ func lower(c *context) bool {
return c.copy()
}
func isLower(c *context) bool {
ct := c.caseType()
if c.info&hasMappingMask == 0 || ct == cLower {
return true
}
if c.info&exceptionBit == 0 {
c.err = transform.ErrEndOfSpan
return false
}
e := exceptions[c.info>>exceptionShift:]
if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
c.err = transform.ErrEndOfSpan
return false
}
return true
}
// upper writes the uppercase version of the current rune to dst.
func upper(c *context) bool {
ct := c.caseType()
@ -249,29 +226,6 @@ func upper(c *context) bool {
return c.copy()
}
// isUpper writes the isUppercase version of the current rune to dst.
func isUpper(c *context) bool {
ct := c.caseType()
if c.info&hasMappingMask == 0 || ct == cUpper {
return true
}
if c.info&exceptionBit == 0 {
c.err = transform.ErrEndOfSpan
return false
}
e := exceptions[c.info>>exceptionShift:]
// Get length of first special case mapping.
n := (e[1] >> lengthBits) & lengthMask
if ct == cTitle {
n = e[1] & lengthMask
}
if n != noChange {
c.err = transform.ErrEndOfSpan
return false
}
return true
}
// title writes the title case version of the current rune to dst.
func title(c *context) bool {
ct := c.caseType()
@ -303,33 +257,6 @@ func title(c *context) bool {
return c.copy()
}
// isTitle reports whether the current rune is in title case.
func isTitle(c *context) bool {
ct := c.caseType()
if c.info&hasMappingMask == 0 || ct == cTitle {
return true
}
if c.info&exceptionBit == 0 {
if ct == cLower {
c.err = transform.ErrEndOfSpan
return false
}
return true
}
// Get the exception data.
e := exceptions[c.info>>exceptionShift:]
if nTitle := e[1] & lengthMask; nTitle != noChange {
c.err = transform.ErrEndOfSpan
return false
}
nFirst := (e[1] >> lengthBits) & lengthMask
if ct == cLower && nFirst != noChange {
c.err = transform.ErrEndOfSpan
return false
}
return true
}
// foldFull writes the foldFull version of the current rune to dst.
func foldFull(c *context) bool {
if c.info&hasMappingMask == 0 {
@ -352,25 +279,3 @@ func foldFull(c *context) bool {
}
return c.writeString(e[2 : 2+n])
}
// isFoldFull reports whether the current run is mapped to foldFull
func isFoldFull(c *context) bool {
if c.info&hasMappingMask == 0 {
return true
}
ct := c.caseType()
if c.info&exceptionBit == 0 {
if ct != cLower || c.info&inverseFoldBit != 0 {
c.err = transform.ErrEndOfSpan
return false
}
return true
}
e := exceptions[c.info>>exceptionShift:]
n := e[0] & lengthMask
if n == 0 && ct == cLower {
return true
}
c.err = transform.ErrEndOfSpan
return false
}

View file

@ -18,15 +18,7 @@ func (t *caseFolder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err
return c.ret()
}
func (t *caseFolder) Span(src []byte, atEOF bool) (n int, err error) {
c := context{src: src, atEOF: atEOF}
for c.next() && isFoldFull(&c) {
c.checkpoint()
}
return c.retSpan()
}
func makeFold(o options) transform.SpanningTransformer {
func makeFold(o options) transform.Transformer {
// TODO: Special case folding, through option Language, Special/Turkic, or
// both.
// TODO: Implement Compact options.

View file

@ -76,7 +76,7 @@ type breakCategory int
const (
breakBreak breakCategory = iota
breakLetter
breakMid
breakIgnored
)
// mapping returns the case mapping for the given case type.
@ -162,14 +162,9 @@ func parseUCD() []runeInfo {
// We collapse the word breaking properties onto the categories we need.
switch p.String(1) { // TODO: officially we need to canonicalize.
case "MidLetter", "MidNumLet", "Single_Quote":
ri.BreakCat = breakMid
if !ri.CaseIgnorable {
// finalSigma relies on the fact that all breakMid runes are
// also a Case_Ignorable. Revisit this code when this changes.
log.Fatalf("Rune %U, which has a break category mid, is not a case ignorable", ri)
}
case "ALetter", "Hebrew_Letter", "Numeric", "Extend", "ExtendNumLet", "Format", "ZWJ":
case "Format", "MidLetter", "MidNumLet", "Single_Quote":
ri.BreakCat = breakIgnored
case "ALetter", "Hebrew_Letter", "Numeric", "Extend", "ExtendNumLet":
ri.BreakCat = breakLetter
}
})
@ -245,11 +240,8 @@ func makeEntry(ri *runeInfo) {
case above: // Above
ccc = cccAbove
}
switch ri.BreakCat {
case breakBreak:
if ri.BreakCat == breakBreak {
ccc = cccBreak
case breakMid:
ri.entry |= isMidBit
}
ri.entry |= ccc
@ -698,7 +690,7 @@ func genTablesTest() {
parse("auxiliary/WordBreakProperty.txt", func(p *ucd.Parser) {
switch p.String(1) {
case "Extend", "Format", "MidLetter", "MidNumLet", "Single_Quote",
"ALetter", "Hebrew_Letter", "Numeric", "ExtendNumLet", "ZWJ":
"ALetter", "Hebrew_Letter", "Numeric", "ExtendNumLet":
notBreak[p.Rune(0)] = true
}
})

View file

@ -26,7 +26,6 @@ package main
// Only 13..8 are used for XOR patterns.
// 7 inverseFold (fold to upper, not to lower)
// 6 index: interpret the XOR pattern as an index
// or isMid if case mode is cIgnorableUncased.
// 5..4 CCC: zero (normal or break), above or other
// }
// 3 exception: interpret this value as an exception index
@ -49,7 +48,6 @@ const (
ignorableValue = 0x0004
inverseFoldBit = 1 << 7
isMidBit = 1 << 6
exceptionBit = 1 << 3
exceptionShift = 5
@ -59,7 +57,7 @@ const (
xorShift = 8
// There is no mapping if all xor bits and the exception bit are zero.
hasMappingMask = 0xff80 | exceptionBit
hasMappingMask = 0xffc0 | exceptionBit
)
// The case mode bits encodes the case type of a rune. This includes uncased,
@ -97,6 +95,10 @@ func (c info) isCaseIgnorable() bool {
return c&ignorableMask == ignorableValue
}
func (c info) isCaseIgnorableAndNonBreakStarter() bool {
return c&(fullCasedMask|cccMask) == (ignorableValue | cccZero)
}
func (c info) isNotCasedAndNotCaseIgnorable() bool {
return c&fullCasedMask == 0
}
@ -105,10 +107,6 @@ func (c info) isCaseIgnorableAndNotCased() bool {
return c&fullCasedMask == cIgnorableUncased
}
func (c info) isMid() bool {
return c&(fullCasedMask|isMidBit) == isMidBit|cIgnorableUncased
}
// The case mapping implementation will need to know about various Canonical
// Combining Class (CCC) values. We encode two of these in the trie value:
// cccZero (0) and cccAbove (230). If the value is cccOther, it means that

View file

@ -1,61 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build icu
package cases
// Ideally these functions would be defined in a test file, but go test doesn't
// allow CGO in tests. The build tag should ensure either way that these
// functions will not end up in the package.
// TODO: Ensure that the correct ICU version is set.
/*
#cgo LDFLAGS: -licui18n.57 -licuuc.57
#include <stdlib.h>
#include <unicode/ustring.h>
#include <unicode/utypes.h>
#include <unicode/localpointer.h>
#include <unicode/ucasemap.h>
*/
import "C"
import "unsafe"
func doICU(tag, caser, input string) string {
err := C.UErrorCode(0)
loc := C.CString(tag)
cm := C.ucasemap_open(loc, C.uint32_t(0), &err)
buf := make([]byte, len(input)*4)
dst := (*C.char)(unsafe.Pointer(&buf[0]))
src := C.CString(input)
cn := C.int32_t(0)
switch caser {
case "fold":
cn = C.ucasemap_utf8FoldCase(cm,
dst, C.int32_t(len(buf)),
src, C.int32_t(len(input)),
&err)
case "lower":
cn = C.ucasemap_utf8ToLower(cm,
dst, C.int32_t(len(buf)),
src, C.int32_t(len(input)),
&err)
case "upper":
cn = C.ucasemap_utf8ToUpper(cm,
dst, C.int32_t(len(buf)),
src, C.int32_t(len(input)),
&err)
case "title":
cn = C.ucasemap_utf8ToTitle(cm,
dst, C.int32_t(len(buf)),
src, C.int32_t(len(input)),
&err)
}
return string(buf[:cn])
}

View file

@ -28,6 +28,9 @@ func (c info) cccType() info {
// only makes sense, though, if the performance and/or space penalty of using
// the generic breaker is big. Extra data will only be needed for non-cased
// runes, which means there are sufficient bits left in the caseType.
// Also note that the standard breaking algorithm doesn't always make sense
// for title casing. For example, a4a -> A4a, but a"4a -> A"4A (where " stands
// for modifier \u0308).
// ICU prohibits breaking in such cases as well.
// For the purpose of title casing we use an approximation of the Unicode Word
@ -38,19 +41,17 @@ func (c info) cccType() info {
// categories, with associated rules:
//
// 1) Letter:
// ALetter, Hebrew_Letter, Numeric, ExtendNumLet, Extend, Format_FE, ZWJ.
// ALetter, Hebrew_Letter, Numeric, ExtendNumLet, Extend.
// Rule: Never break between consecutive runes of this category.
//
// 2) Mid:
// MidLetter, MidNumLet, Single_Quote.
// (Cf. case-ignorable: MidLetter, MidNumLet, Single_Quote or cat is Mn,
// Me, Cf, Lm or Sk).
// Format, MidLetter, MidNumLet, Single_Quote.
// (Cf. case-ignorable: MidLetter, MidNumLet or cat is Mn, Me, Cf, Lm or Sk).
// Rule: Don't break between Letter and Mid, but break between two Mids.
//
// 3) Break:
// Any other category: NewLine, MidNum, CR, LF, Double_Quote, Katakana, and
// Other.
// These categories should always result in a break between two cased letters.
// Any other category, including NewLine, CR, LF and Double_Quote. These
// categories should always result in a break between two cased letters.
// Rule: Always break.
//
// Note 1: the Katakana and MidNum categories can, in esoteric cases, result in

383
vendor/golang.org/x/text/cases/map.go generated vendored
View file

@ -13,7 +13,6 @@ import (
"unicode"
"unicode/utf8"
"golang.org/x/text/internal"
"golang.org/x/text/language"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
@ -25,11 +24,6 @@ import (
// dst so far won't need changing as we see more source bytes.
type mapFunc func(*context) bool
// A spanFunc takes a context set to the current rune and returns whether this
// rune would be altered when written to the output. It may advance the context
// to the next rune. It returns whether a checkpoint is possible.
type spanFunc func(*context) bool
// maxIgnorable defines the maximum number of ignorables to consider for
// lookahead operations.
const maxIgnorable = 30
@ -42,12 +36,12 @@ func init() {
for _, s := range strings.Split(supported, " ") {
tags = append(tags, language.MustParse(s))
}
matcher = internal.NewInheritanceMatcher(tags)
matcher = language.NewMatcher(tags)
Supported = language.NewCoverage(tags)
}
var (
matcher *internal.InheritanceMatcher
matcher language.Matcher
Supported language.Coverage
@ -56,69 +50,56 @@ var (
// Some uppercase mappers are stateless, so we can precompute the
// Transformers and save a bit on runtime allocations.
upperFunc = []struct {
upper mapFunc
span spanFunc
}{
{nil, nil}, // und
{nil, nil}, // af
{aztrUpper(upper), isUpper}, // az
{elUpper, noSpan}, // el
{ltUpper(upper), noSpan}, // lt
{nil, nil}, // nl
{aztrUpper(upper), isUpper}, // tr
upperFunc = []mapFunc{
nil, // und
nil, // af
aztrUpper(upper), // az
elUpper, // el
ltUpper(upper), // lt
nil, // nl
aztrUpper(upper), // tr
}
undUpper transform.SpanningTransformer = &undUpperCaser{}
undLower transform.SpanningTransformer = &undLowerCaser{}
undLowerIgnoreSigma transform.SpanningTransformer = &undLowerIgnoreSigmaCaser{}
undUpper transform.Transformer = &undUpperCaser{}
lowerFunc = []mapFunc{
nil, // und
nil, // af
lower, // und
lower, // af
aztrLower, // az
nil, // el
lower, // el
ltLower, // lt
nil, // nl
lower, // nl
aztrLower, // tr
}
titleInfos = []struct {
title mapFunc
lower mapFunc
titleSpan spanFunc
rewrite func(*context)
title, lower mapFunc
rewrite func(*context)
}{
{title, lower, isTitle, nil}, // und
{title, lower, isTitle, afnlRewrite}, // af
{aztrUpper(title), aztrLower, isTitle, nil}, // az
{title, lower, isTitle, nil}, // el
{ltUpper(title), ltLower, noSpan, nil}, // lt
{nlTitle, lower, nlTitleSpan, afnlRewrite}, // nl
{aztrUpper(title), aztrLower, isTitle, nil}, // tr
{title, lower, nil}, // und
{title, lower, afnlRewrite}, // af
{aztrUpper(title), aztrLower, nil}, // az
{title, lower, nil}, // el
{ltUpper(title), ltLower, nil}, // lt
{nlTitle, lower, afnlRewrite}, // nl
{aztrUpper(title), aztrLower, nil}, // tr
}
)
func makeUpper(t language.Tag, o options) transform.SpanningTransformer {
func makeUpper(t language.Tag, o options) transform.Transformer {
_, i, _ := matcher.Match(t)
f := upperFunc[i].upper
f := upperFunc[i]
if f == nil {
return undUpper
}
return &simpleCaser{f: f, span: upperFunc[i].span}
return &simpleCaser{f: f}
}
func makeLower(t language.Tag, o options) transform.SpanningTransformer {
func makeLower(t language.Tag, o options) transform.Transformer {
_, i, _ := matcher.Match(t)
f := lowerFunc[i]
if f == nil {
if o.ignoreFinalSigma {
return undLowerIgnoreSigma
}
return undLower
}
if o.ignoreFinalSigma {
return &simpleCaser{f: f, span: isLower}
if o.noFinalSigma {
return &simpleCaser{f: f}
}
return &lowerCaser{
first: f,
@ -126,28 +107,22 @@ func makeLower(t language.Tag, o options) transform.SpanningTransformer {
}
}
func makeTitle(t language.Tag, o options) transform.SpanningTransformer {
func makeTitle(t language.Tag, o options) transform.Transformer {
_, i, _ := matcher.Match(t)
x := &titleInfos[i]
lower := x.lower
if o.noLower {
lower = (*context).copy
} else if !o.ignoreFinalSigma {
} else if !o.noFinalSigma {
lower = finalSigma(lower)
}
return &titleCaser{
title: x.title,
lower: lower,
titleSpan: x.titleSpan,
rewrite: x.rewrite,
title: x.title,
lower: lower,
rewrite: x.rewrite,
}
}
func noSpan(c *context) bool {
c.err = transform.ErrEndOfSpan
return false
}
// TODO: consider a similar special case for the fast majority lower case. This
// is a bit more involved so will require some more precise benchmarking to
// justify it.
@ -157,7 +132,7 @@ type undUpperCaser struct{ transform.NopResetter }
// undUpperCaser implements the Transformer interface for doing an upper case
// mapping for the root locale (und). It eliminates the need for an allocation
// as it prevents escaping by not using function pointers.
func (t undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
func (t *undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
c := context{dst: dst, src: src, atEOF: atEOF}
for c.next() {
upper(&c)
@ -166,117 +141,26 @@ func (t undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, e
return c.ret()
}
func (t undUpperCaser) Span(src []byte, atEOF bool) (n int, err error) {
c := context{src: src, atEOF: atEOF}
for c.next() && isUpper(&c) {
c.checkpoint()
}
return c.retSpan()
}
// undLowerIgnoreSigmaCaser implements the Transformer interface for doing
// a lower case mapping for the root locale (und) ignoring final sigma
// handling. This casing algorithm is used in some performance-critical packages
// like secure/precis and x/net/http/idna, which warrants its special-casing.
type undLowerIgnoreSigmaCaser struct{ transform.NopResetter }
func (t undLowerIgnoreSigmaCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
c := context{dst: dst, src: src, atEOF: atEOF}
for c.next() && lower(&c) {
c.checkpoint()
}
return c.ret()
}
// Span implements a generic lower-casing. This is possible as isLower works
// for all lowercasing variants. All lowercase variants only vary in how they
// transform a non-lowercase letter. They will never change an already lowercase
// letter. In addition, there is no state.
func (t undLowerIgnoreSigmaCaser) Span(src []byte, atEOF bool) (n int, err error) {
c := context{src: src, atEOF: atEOF}
for c.next() && isLower(&c) {
c.checkpoint()
}
return c.retSpan()
}
type simpleCaser struct {
context
f mapFunc
span spanFunc
f mapFunc
}
// simpleCaser implements the Transformer interface for doing a case operation
// on a rune-by-rune basis.
func (t *simpleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
c := context{dst: dst, src: src, atEOF: atEOF}
for c.next() && t.f(&c) {
t.context = context{dst: dst, src: src, atEOF: atEOF}
c := &t.context
for c.next() && t.f(c) {
c.checkpoint()
}
return c.ret()
}
func (t *simpleCaser) Span(src []byte, atEOF bool) (n int, err error) {
c := context{src: src, atEOF: atEOF}
for c.next() && t.span(&c) {
c.checkpoint()
}
return c.retSpan()
}
// undLowerCaser implements the Transformer interface for doing a lower case
// mapping for the root locale (und) ignoring final sigma handling. This casing
// algorithm is used in some performance-critical packages like secure/precis
// and x/net/http/idna, which warrants its special-casing.
type undLowerCaser struct{ transform.NopResetter }
func (t undLowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
c := context{dst: dst, src: src, atEOF: atEOF}
for isInterWord := true; c.next(); {
if isInterWord {
if c.info.isCased() {
if !lower(&c) {
break
}
isInterWord = false
} else if !c.copy() {
break
}
} else {
if c.info.isNotCasedAndNotCaseIgnorable() {
if !c.copy() {
break
}
isInterWord = true
} else if !c.hasPrefix("Σ") {
if !lower(&c) {
break
}
} else if !finalSigmaBody(&c) {
break
}
}
c.checkpoint()
}
return c.ret()
}
func (t undLowerCaser) Span(src []byte, atEOF bool) (n int, err error) {
c := context{src: src, atEOF: atEOF}
for c.next() && isLower(&c) {
c.checkpoint()
}
return c.retSpan()
}
// lowerCaser implements the Transformer interface. The default Unicode lower
// casing requires different treatment for the first and subsequent characters
// of a word, most notably to handle the Greek final Sigma.
type lowerCaser struct {
undLowerIgnoreSigmaCaser
context
first, midWord mapFunc
@ -318,9 +202,7 @@ type titleCaser struct {
context
// rune mappings used by the actual casing algorithms.
title mapFunc
lower mapFunc
titleSpan spanFunc
title, lower mapFunc
rewrite func(*context)
}
@ -346,10 +228,10 @@ func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err
t.rewrite(c)
}
wasMid := p.isMid()
wasMid := p.isCaseIgnorableAndNonBreakStarter()
// Break out of this loop on failure to ensure we do not modify the
// state incorrectly.
if p.isCased() {
if p.isCased() && !p.isCaseIgnorableAndNotCased() {
if !c.isMidWord {
if !t.title(c) {
break
@ -360,139 +242,71 @@ func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err
}
} else if !c.copy() {
break
} else if p.isBreak() {
}
// TODO: make this an "else if" if we can prove that no rune that does
// not match the first condition of the if statement can be a break.
if p.isBreak() {
c.isMidWord = false
}
// As we save the state of the transformer, it is safe to call
// checkpoint after any successful write.
if !(c.isMidWord && wasMid) {
c.checkpoint()
}
c.checkpoint()
if !c.next() {
break
}
if wasMid && c.info.isMid() {
if wasMid && c.info.isCaseIgnorableAndNonBreakStarter() {
c.isMidWord = false
}
}
return c.ret()
}
func (t *titleCaser) Span(src []byte, atEOF bool) (n int, err error) {
t.context = context{src: src, atEOF: atEOF, isMidWord: t.isMidWord}
c := &t.context
if !c.next() {
return c.retSpan()
}
for {
p := c.info
if t.rewrite != nil {
t.rewrite(c)
}
wasMid := p.isMid()
// Break out of this loop on failure to ensure we do not modify the
// state incorrectly.
if p.isCased() {
if !c.isMidWord {
if !t.titleSpan(c) {
break
}
c.isMidWord = true
} else if !isLower(c) {
break
}
} else if p.isBreak() {
c.isMidWord = false
}
// As we save the state of the transformer, it is safe to call
// checkpoint after any successful write.
if !(c.isMidWord && wasMid) {
c.checkpoint()
}
if !c.next() {
break
}
if wasMid && c.info.isMid() {
c.isMidWord = false
}
}
return c.retSpan()
}
// finalSigma adds Greek final Sigma handing to another casing function. It
// determines whether a lowercased sigma should be σ or ς, by looking ahead for
// case-ignorables and a cased letters.
func finalSigma(f mapFunc) mapFunc {
return func(c *context) bool {
// ::NFD();
// # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
// Σ } [:case-ignorable:]* [:cased:] → σ;
// [:cased:] [:case-ignorable:]* { Σ → ς;
// ::Any-Lower;
// ::NFC();
if !c.hasPrefix("Σ") {
return f(c)
}
return finalSigmaBody(c)
}
}
func finalSigmaBody(c *context) bool {
// Current rune must be ∑.
// ::NFD();
// # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
// Σ } [:case-ignorable:]* [:cased:] → σ;
// [:cased:] [:case-ignorable:]* { Σ → ς;
// ::Any-Lower;
// ::NFC();
p := c.pDst
c.writeString("ς")
// TODO: we should do this here, but right now this will never have an
// effect as this is called when the prefix is Sigma, whereas Dutch and
// Afrikaans only test for an apostrophe.
//
// if t.rewrite != nil {
// t.rewrite(c)
// }
// We need to do one more iteration after maxIgnorable, as a cased
// letter is not an ignorable and may modify the result.
wasMid := false
for i := 0; i < maxIgnorable+1; i++ {
if !c.next() {
return false
}
if !c.info.isCaseIgnorable() {
// All Midword runes are also case ignorable, so we are
// guaranteed to have a letter or word break here. As we are
// unreading the run, there is no need to unset c.isMidWord;
// the title caser will handle this.
if c.info.isCased() {
// p+1 is guaranteed to be in bounds: if writing ς was
// successful, p+1 will contain the second byte of ς. If not,
// this function will have returned after c.next returned false.
c.dst[p+1]++ // ς → σ
p := c.pDst
c.writeString("ς")
// We need to do one more iteration after maxIgnorable, as a cased
// letter is not an ignorable and may modify the result.
for i := 0; i < maxIgnorable+1; i++ {
if !c.next() {
return false
}
c.unreadRune()
return true
if !c.info.isCaseIgnorable() {
if c.info.isCased() {
// p+1 is guaranteed to be in bounds: if writing ς was
// successful, p+1 will contain the second byte of ς. If not,
// this function will have returned after c.next returned false.
c.dst[p+1]++ // ς → σ
}
c.unreadRune()
return true
}
// A case ignorable may also introduce a word break, so we may need
// to continue searching even after detecting a break.
c.isMidWord = c.isMidWord && !c.info.isBreak()
c.copy()
}
// A case ignorable may also introduce a word break, so we may need
// to continue searching even after detecting a break.
isMid := c.info.isMid()
if (wasMid && isMid) || c.info.isBreak() {
c.isMidWord = false
}
wasMid = isMid
c.copy()
return true
}
return true
}
// finalSigmaSpan would be the same as isLower.
// elUpper implements Greek upper casing, which entails removing a predefined
// set of non-blocked modifiers. Note that these accents should not be removed
// for title casing!
@ -562,8 +376,6 @@ func elUpper(c *context) bool {
return i == maxIgnorable
}
// TODO: implement elUpperSpan (low-priority: complex and infrequent).
func ltLower(c *context) bool {
// From CLDR:
// # Introduce an explicit dot above when lowercasing capital I's and J's
@ -578,10 +390,10 @@ func ltLower(c *context) bool {
// ::NFD();
// I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
// J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
// I \u0328 (Į) } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307;
// I \u0300 (Ì) → i \u0307 \u0300;
// I \u0301 (Í) → i \u0307 \u0301;
// I \u0303 (Ĩ) → i \u0307 \u0303;
// Į } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → į \u0307;
// Ì → i \u0307 \u0300;
// Í → i \u0307 \u0301;
// Ĩ → i \u0307 \u0303;
// ::Any-Lower();
// ::NFC();
@ -633,16 +445,9 @@ func ltLower(c *context) bool {
return i == maxIgnorable
}
// ltLowerSpan would be the same as isLower.
func ltUpper(f mapFunc) mapFunc {
return func(c *context) bool {
// Unicode:
// 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
//
// From CLDR:
// # Remove \u0307 following soft-dotteds (i, j, and the like), with possible
// # intervening non-230 marks.
// ::NFD();
// [:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ;
// ::Any-Upper();
@ -706,8 +511,6 @@ func ltUpper(f mapFunc) mapFunc {
}
}
// TODO: implement ltUpperSpan (low priority: complex and infrequent).
func aztrUpper(f mapFunc) mapFunc {
return func(c *context) bool {
// i→İ;
@ -768,8 +571,6 @@ Loop:
return c.writeString("ı") && c.writeBytes(c.src[start:c.pSrc+c.sz]) && done
}
// aztrLowerSpan would be the same as isLower.
func nlTitle(c *context) bool {
// From CLDR:
// # Special titlecasing for Dutch initial "ij".
@ -790,24 +591,6 @@ func nlTitle(c *context) bool {
return true
}
func nlTitleSpan(c *context) bool {
// From CLDR:
// # Special titlecasing for Dutch initial "ij".
// ::Any-Title();
// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
if c.src[c.pSrc] != 'I' {
return isTitle(c)
}
if !c.next() || c.src[c.pSrc] == 'j' {
return false
}
if c.src[c.pSrc] != 'J' {
c.unreadRune()
}
return true
}
// Not part of CLDR, but see http://unicode.org/cldr/trac/ticket/7078.
func afnlRewrite(c *context) {
if c.hasPrefix("'") || c.hasPrefix("") {

File diff suppressed because it is too large Load diff

View file

@ -22,7 +22,6 @@ package cases
// Only 13..8 are used for XOR patterns.
// 7 inverseFold (fold to upper, not to lower)
// 6 index: interpret the XOR pattern as an index
// or isMid if case mode is cIgnorableUncased.
// 5..4 CCC: zero (normal or break), above or other
// }
// 3 exception: interpret this value as an exception index
@ -45,7 +44,6 @@ const (
ignorableValue = 0x0004
inverseFoldBit = 1 << 7
isMidBit = 1 << 6
exceptionBit = 1 << 3
exceptionShift = 5
@ -55,7 +53,7 @@ const (
xorShift = 8
// There is no mapping if all xor bits and the exception bit are zero.
hasMappingMask = 0xff80 | exceptionBit
hasMappingMask = 0xffc0 | exceptionBit
)
// The case mode bits encodes the case type of a rune. This includes uncased,
@ -93,6 +91,10 @@ func (c info) isCaseIgnorable() bool {
return c&ignorableMask == ignorableValue
}
func (c info) isCaseIgnorableAndNonBreakStarter() bool {
return c&(fullCasedMask|cccMask) == (ignorableValue | cccZero)
}
func (c info) isNotCasedAndNotCaseIgnorable() bool {
return c&fullCasedMask == 0
}
@ -101,10 +103,6 @@ func (c info) isCaseIgnorableAndNotCased() bool {
return c&fullCasedMask == cIgnorableUncased
}
func (c info) isMid() bool {
return c&(fullCasedMask|isMidBit) == isMidBit|cIgnorableUncased
}
// The case mapping implementation will need to know about various Canonical
// Combining Class (CCC) values. We encode two of these in the trie value:
// cccZero (0) and cccAbove (230). If the value is cccOther, it means that

77
vendor/golang.org/x/text/gen.go generated vendored
View file

@ -12,8 +12,6 @@ import (
"bytes"
"flag"
"fmt"
"go/build"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
@ -28,7 +26,6 @@ import (
var (
verbose = flag.Bool("v", false, "verbose output")
force = flag.Bool("force", false, "ignore failing dependencies")
doCore = flag.Bool("core", false, "force an update to core")
excludeList = flag.String("exclude", "",
"comma-separated list of packages to exclude")
@ -64,7 +61,6 @@ func main() {
// variable. This will prevent duplicate downloads and also will enable long
// tests, which really need to be run after each generated package.
updateCore := *doCore
if gen.UnicodeVersion() != unicode.Version {
fmt.Printf("Requested Unicode version %s; core unicode version is %s.\n",
gen.UnicodeVersion(),
@ -76,44 +72,24 @@ func main() {
if gen.UnicodeVersion() < unicode.Version && !*force {
os.Exit(2)
}
updateCore = true
}
var unicode = &dependency{}
if updateCore {
fmt.Printf("Updating core to version %s...\n", gen.UnicodeVersion())
unicode = generate("unicode")
// Test some users of the unicode packages, especially the ones that
// keep a mirrored table. These may need to be corrected by hand.
generate("regexp", unicode)
generate("strconv", unicode) // mimics Unicode table
generate("strings", unicode)
generate("testing", unicode) // mimics Unicode table
}
var (
cldr = generate("./unicode/cldr", unicode)
language = generate("./language", cldr)
internal = generate("./internal", unicode, language)
norm = generate("./unicode/norm", unicode)
rangetable = generate("./unicode/rangetable", unicode)
cases = generate("./cases", unicode, norm, language, rangetable)
width = generate("./width", unicode)
bidi = generate("./unicode/bidi", unicode, norm, rangetable)
_ = generate("./secure/precis", unicode, norm, rangetable, cases, width, bidi)
_ = generate("./encoding/htmlindex", unicode, language)
_ = generate("./currency", unicode, cldr, language, internal)
_ = generate("./internal/number", unicode, cldr, language, internal)
_ = generate("./language/display", unicode, cldr, language, internal)
_ = generate("./collate", unicode, norm, cldr, language, rangetable)
_ = generate("./search", unicode, norm, cldr, language, rangetable)
cldr = generate("unicode/cldr")
language = generate("language", cldr)
internal = generate("internal", language)
norm = generate("unicode/norm")
rangetable = generate("unicode/rangetable")
cases = generate("cases", norm, language, rangetable)
width = generate("width")
bidi = generate("unicode/bidi", norm, rangetable)
_ = generate("secure/precis", norm, rangetable, cases, width, bidi)
_ = generate("encoding/htmlindex", language)
_ = generate("currency", cldr, language, internal)
_ = generate("internal/number", cldr, language, internal)
_ = generate("language/display", cldr, language)
_ = generate("collate", norm, cldr, language, rangetable)
_ = generate("search", norm, cldr, language, rangetable)
)
if updateCore {
copyVendored()
generate("vendor/golang_org/x/net/idna", unicode, norm, width, cases)
}
all.Wait()
if hasErrors {
@ -157,7 +133,7 @@ func generate(pkg string, deps ...*dependency) *dependency {
if *verbose {
args = append(args, "-v")
}
args = append(args, pkg)
args = append(args, "./"+pkg)
cmd := exec.Command(filepath.Join(runtime.GOROOT(), "bin", "go"), args...)
w := &bytes.Buffer{}
cmd.Stderr = w
@ -187,27 +163,6 @@ func generate(pkg string, deps ...*dependency) *dependency {
return &wg
}
func copyVendored() {
// Copy the vendored files. Some more may need to be copied in by hand.
dir := filepath.Join(build.Default.GOROOT, "src/vendor/golang_org/x/text")
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if info.IsDir() {
return nil
}
b, err := ioutil.ReadFile(path[len(dir)+1:])
if err != nil {
return err
}
vprintf("=== COPY %s\n", path)
b = bytes.Replace(b, []byte("golang.org"), []byte("golang_org"), -1)
return ioutil.WriteFile(path, b, 0666)
})
if err != nil {
fmt.Println("Copying vendored files failed:", err)
os.Exit(1)
}
}
func contains(a []string, s string) bool {
for _, e := range a {
if s == e {

View file

@ -1,52 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"log"
"golang.org/x/text/internal/gen"
"golang.org/x/text/language"
"golang.org/x/text/unicode/cldr"
)
func main() {
r := gen.OpenCLDRCoreZip()
defer r.Close()
d := &cldr.Decoder{}
data, err := d.DecodeZip(r)
if err != nil {
log.Fatalf("DecodeZip: %v", err)
}
w := gen.NewCodeWriter()
defer w.WriteGoFile("tables.go", "internal")
// Create parents table.
parents := make([]uint16, language.NumCompactTags)
for _, loc := range data.Locales() {
tag := language.MustParse(loc)
index, ok := language.CompactIndex(tag)
if !ok {
continue
}
parentIndex := 0 // und
for p := tag.Parent(); p != language.Und; p = p.Parent() {
if x, ok := language.CompactIndex(p); ok {
parentIndex = x
break
}
}
parents[index] = uint16(parentIndex)
}
w.WriteComment(`
Parent maps a compact index of a tag to the compact index of the parent of
this tag.`)
w.WriteVar("Parent", parents)
}

View file

@ -1,51 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go
// Package internal contains non-exported functionality that are used by
// packages in the text repository.
package internal // import "golang.org/x/text/internal"
import (
"sort"
"golang.org/x/text/language"
)
// SortTags sorts tags in place.
func SortTags(tags []language.Tag) {
sort.Sort(sorter(tags))
}
type sorter []language.Tag
func (s sorter) Len() int {
return len(s)
}
func (s sorter) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
func (s sorter) Less(i, j int) bool {
return s[i].String() < s[j].String()
}
// UniqueTags sorts and filters duplicate tags in place and returns a slice with
// only unique tags.
func UniqueTags(tags []language.Tag) []language.Tag {
if len(tags) <= 1 {
return tags
}
SortTags(tags)
k := 0
for i := 1; i < len(tags); i++ {
if tags[k].String() < tags[i].String() {
k++
tags[k] = tags[i]
}
}
return tags[:k+1]
}

View file

@ -1,67 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package internal
// This file contains matchers that implement CLDR inheritance.
//
// See http://unicode.org/reports/tr35/#Locale_Inheritance.
//
// Some of the inheritance described in this document is already handled by
// the cldr package.
import (
"golang.org/x/text/language"
)
// TODO: consider if (some of the) matching algorithm needs to be public after
// getting some feel about what is generic and what is specific.
// NewInheritanceMatcher returns a matcher that matches based on the inheritance
// chain.
//
// The matcher uses canonicalization and the parent relationship to find a
// match. The resulting match will always be either Und or a language with the
// same language and script as the requested language. It will not match
// languages for which there is understood to be mutual or one-directional
// intelligibility.
//
// A Match will indicate an Exact match if the language matches after
// canonicalization and High if the matched tag is a parent.
func NewInheritanceMatcher(t []language.Tag) *InheritanceMatcher {
tags := &InheritanceMatcher{make(map[language.Tag]int)}
for i, tag := range t {
ct, err := language.All.Canonicalize(tag)
if err != nil {
ct = tag
}
tags.index[ct] = i
}
return tags
}
type InheritanceMatcher struct {
index map[language.Tag]int
}
func (m InheritanceMatcher) Match(want ...language.Tag) (language.Tag, int, language.Confidence) {
for _, t := range want {
ct, err := language.All.Canonicalize(t)
if err != nil {
ct = t
}
conf := language.Exact
for {
if index, ok := m.index[ct]; ok {
return ct, index, conf
}
if ct == language.Und {
break
}
ct = ct.Parent()
conf = language.High
}
}
return language.Und, 0, language.No
}

View file

@ -1,116 +0,0 @@
// This file was generated by go generate; DO NOT EDIT
package internal
// Parent maps a compact index of a tag to the compact index of the parent of
// this tag.
var Parent = []uint16{ // 752 elements
// Entry 0 - 3F
0x0000, 0x0053, 0x00e5, 0x0000, 0x0003, 0x0003, 0x0000, 0x0006,
0x0000, 0x0008, 0x0000, 0x000a, 0x0000, 0x000c, 0x000c, 0x000c,
0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c,
0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c,
0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c,
0x000c, 0x0000, 0x0000, 0x002a, 0x0000, 0x002c, 0x0000, 0x002e,
0x0000, 0x0000, 0x0031, 0x0030, 0x0030, 0x0000, 0x0035, 0x0000,
0x0037, 0x0000, 0x0039, 0x0000, 0x003b, 0x0000, 0x003d, 0x0000,
// Entry 40 - 7F
0x0000, 0x0040, 0x0000, 0x0042, 0x0042, 0x0000, 0x0045, 0x0045,
0x0000, 0x0048, 0x0000, 0x004a, 0x0000, 0x0000, 0x004d, 0x004c,
0x004c, 0x0000, 0x0051, 0x0051, 0x0051, 0x0051, 0x0000, 0x0056,
0x0000, 0x0058, 0x0000, 0x005a, 0x0000, 0x005c, 0x005c, 0x0000,
0x005f, 0x0000, 0x0061, 0x0000, 0x0063, 0x0000, 0x0065, 0x0065,
0x0000, 0x0068, 0x0000, 0x006a, 0x006a, 0x006a, 0x006a, 0x006a,
0x006a, 0x006a, 0x0000, 0x0072, 0x0000, 0x0074, 0x0000, 0x0076,
0x0000, 0x0000, 0x0079, 0x0000, 0x007b, 0x0000, 0x007d, 0x0000,
// Entry 80 - BF
0x007f, 0x007f, 0x0000, 0x0082, 0x0082, 0x0000, 0x0085, 0x0086,
0x0086, 0x0086, 0x0085, 0x0087, 0x0086, 0x0086, 0x0086, 0x0085,
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0087, 0x0086,
0x0086, 0x0086, 0x0086, 0x0087, 0x0086, 0x0087, 0x0086, 0x0086,
0x0087, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086,
0x0086, 0x0086, 0x0085, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086,
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086,
0x0086, 0x0086, 0x0086, 0x0086, 0x0085, 0x0086, 0x0085, 0x0086,
// Entry C0 - FF
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0087,
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0085,
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0087, 0x0086, 0x0086,
0x0087, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086,
0x0086, 0x0086, 0x0086, 0x0086, 0x0085, 0x0085, 0x0086, 0x0086,
0x0085, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0000, 0x00ee,
0x0000, 0x00f0, 0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x00f1,
0x00f1, 0x00f1, 0x00f0, 0x00f1, 0x00f0, 0x00f0, 0x00f1, 0x00f1,
// Entry 100 - 13F
0x00f0, 0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x00f0, 0x00f1, 0x00f1,
0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x0000, 0x010c, 0x0000, 0x010e,
0x0000, 0x0110, 0x0000, 0x0112, 0x0112, 0x0000, 0x0115, 0x0115,
0x0115, 0x0115, 0x0000, 0x011a, 0x0000, 0x011c, 0x0000, 0x011e,
0x011e, 0x0000, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121,
0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121,
0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121,
0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121,
// Entry 140 - 17F
0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121,
0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121,
0x0000, 0x0150, 0x0000, 0x0152, 0x0000, 0x0154, 0x0000, 0x0156,
0x0000, 0x0158, 0x0000, 0x015a, 0x015a, 0x015a, 0x0000, 0x015e,
0x0000, 0x0000, 0x0161, 0x0000, 0x0163, 0x0000, 0x0165, 0x0165,
0x0165, 0x0000, 0x0169, 0x0000, 0x016b, 0x0000, 0x016d, 0x0000,
0x016f, 0x016f, 0x0000, 0x0172, 0x0000, 0x0174, 0x0000, 0x0176,
0x0000, 0x0178, 0x0000, 0x017a, 0x0000, 0x017c, 0x0000, 0x017e,
// Entry 180 - 1BF
0x0000, 0x0180, 0x0180, 0x0180, 0x0000, 0x0000, 0x0185, 0x0000,
0x0000, 0x0188, 0x0000, 0x018a, 0x0000, 0x0000, 0x018d, 0x0000,
0x018f, 0x0000, 0x0000, 0x0192, 0x0000, 0x0000, 0x0195, 0x0000,
0x0197, 0x0000, 0x0199, 0x0000, 0x019b, 0x0000, 0x019d, 0x0000,
0x019f, 0x0000, 0x01a1, 0x0000, 0x01a3, 0x0000, 0x01a5, 0x0000,
0x01a7, 0x0000, 0x01a9, 0x01a9, 0x0000, 0x01ac, 0x0000, 0x01ae,
0x0000, 0x01b0, 0x0000, 0x01b2, 0x0000, 0x01b4, 0x0000, 0x0000,
0x01b7, 0x0000, 0x01b9, 0x0000, 0x01bb, 0x0000, 0x01bd, 0x0000,
// Entry 1C0 - 1FF
0x01bf, 0x0000, 0x01c1, 0x0000, 0x01c3, 0x01c3, 0x01c3, 0x01c3,
0x0000, 0x01c8, 0x0000, 0x01ca, 0x01ca, 0x0000, 0x01cd, 0x0000,
0x01cf, 0x0000, 0x01d1, 0x0000, 0x01d3, 0x0000, 0x01d5, 0x0000,
0x01d7, 0x01d7, 0x0000, 0x01da, 0x0000, 0x01dc, 0x0000, 0x01de,
0x0000, 0x01e0, 0x0000, 0x01e2, 0x0000, 0x01e4, 0x0000, 0x01e6,
0x0000, 0x01e8, 0x0000, 0x01ea, 0x0000, 0x01ec, 0x01ec, 0x01ec,
0x0000, 0x01f0, 0x0000, 0x01f2, 0x0000, 0x01f4, 0x0000, 0x01f6,
0x0000, 0x0000, 0x01f9, 0x0000, 0x01fb, 0x01fb, 0x0000, 0x01fe,
// Entry 200 - 23F
0x0000, 0x0200, 0x0200, 0x0000, 0x0203, 0x0203, 0x0000, 0x0206,
0x0206, 0x0206, 0x0206, 0x0206, 0x0206, 0x0206, 0x0000, 0x020e,
0x0000, 0x0210, 0x0000, 0x0212, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0218, 0x0000, 0x0000, 0x021b, 0x0000, 0x021d, 0x021d,
0x0000, 0x0220, 0x0000, 0x0222, 0x0222, 0x0000, 0x0000, 0x0226,
0x0225, 0x0225, 0x0000, 0x0000, 0x022b, 0x0000, 0x022d, 0x0000,
0x022f, 0x0000, 0x023b, 0x0231, 0x023b, 0x023b, 0x023b, 0x023b,
0x023b, 0x023b, 0x023b, 0x0231, 0x023b, 0x023b, 0x0000, 0x023e,
// Entry 240 - 27F
0x023e, 0x023e, 0x0000, 0x0242, 0x0000, 0x0244, 0x0000, 0x0246,
0x0246, 0x0000, 0x0249, 0x0000, 0x024b, 0x024b, 0x024b, 0x024b,
0x024b, 0x024b, 0x0000, 0x0252, 0x0000, 0x0254, 0x0000, 0x0256,
0x0000, 0x0258, 0x0000, 0x025a, 0x0000, 0x0000, 0x025d, 0x025d,
0x025d, 0x0000, 0x0261, 0x0000, 0x0263, 0x0000, 0x0265, 0x0000,
0x0000, 0x0268, 0x0267, 0x0267, 0x0000, 0x026c, 0x0000, 0x026e,
0x0000, 0x0270, 0x0000, 0x0000, 0x0000, 0x0000, 0x0275, 0x0000,
0x0000, 0x0278, 0x0000, 0x027a, 0x027a, 0x027a, 0x027a, 0x0000,
// Entry 280 - 2BF
0x027f, 0x027f, 0x027f, 0x0000, 0x0283, 0x0283, 0x0283, 0x0283,
0x0283, 0x0000, 0x0289, 0x0289, 0x0289, 0x0289, 0x0000, 0x0000,
0x0000, 0x0000, 0x0291, 0x0291, 0x0291, 0x0000, 0x0295, 0x0295,
0x0295, 0x0295, 0x0000, 0x0000, 0x029b, 0x029b, 0x029b, 0x029b,
0x0000, 0x02a0, 0x0000, 0x02a2, 0x02a2, 0x0000, 0x02a5, 0x0000,
0x02a7, 0x02a7, 0x0000, 0x0000, 0x02ab, 0x0000, 0x0000, 0x02ae,
0x0000, 0x02b0, 0x02b0, 0x0000, 0x0000, 0x02b4, 0x0000, 0x02b6,
0x0000, 0x02b8, 0x0000, 0x02ba, 0x0000, 0x02bc, 0x02bc, 0x0000,
// Entry 2C0 - 2FF
0x0000, 0x02c0, 0x0000, 0x02c2, 0x02bf, 0x02bf, 0x0000, 0x0000,
0x02c7, 0x02c6, 0x02c6, 0x0000, 0x0000, 0x02cc, 0x0000, 0x02ce,
0x0000, 0x02d0, 0x0000, 0x0000, 0x02d3, 0x0000, 0x0000, 0x0000,
0x02d7, 0x0000, 0x02d9, 0x0000, 0x02db, 0x0000, 0x02dd, 0x02dd,
0x0000, 0x02e0, 0x0000, 0x02e2, 0x0000, 0x02e4, 0x02e4, 0x02e4,
0x02e4, 0x02e4, 0x0000, 0x02ea, 0x02eb, 0x02ea, 0x0000, 0x02ee,
} // Size: 1528 bytes
// Total table size 1528 bytes (1KiB); checksum: B99CF952

File diff suppressed because it is too large Load diff

View file

@ -593,7 +593,7 @@ func (t Tag) Extension(x byte) (ext Extension, ok bool) {
return Extension{ext}, true
}
}
return Extension{}, false
return Extension{string(x)}, false
}
// Extensions returns all extensions of t.

View file

@ -678,8 +678,6 @@ func (b *builder) parseIndices() {
b.locale.parse(meta.DefaultContent.Locales)
}
// TODO: region inclusion data will probably not be use used in future matchers.
func (b *builder) computeRegionGroups() {
b.groups = make(map[int]index)
@ -688,11 +686,6 @@ func (b *builder) computeRegionGroups() {
b.groups[i] = index(len(b.groups))
}
for _, g := range b.supp.TerritoryContainment.Group {
// Skip UN and EURO zone as they are flattening the containment
// relationship.
if g.Type == "EZ" || g.Type == "UN" {
continue
}
group := b.region.index(g.Type)
if _, ok := b.groups[group]; !ok {
b.groups[group] = index(len(b.groups))
@ -789,7 +782,6 @@ func (b *builder) writeLanguage() {
lang.updateLater("tw", "twi")
lang.updateLater("nb", "nob")
lang.updateLater("ak", "aka")
lang.updateLater("bh", "bih")
// Ensure that each 2-letter code is matched with a 3-letter code.
for _, v := range lang.s[1:] {
@ -1490,11 +1482,6 @@ func (b *builder) writeRegionInclusionData() {
containment = make(map[index][]index)
)
for _, g := range b.supp.TerritoryContainment.Group {
// Skip UN and EURO zone as they are flattening the containment
// relationship.
if g.Type == "EZ" || g.Type == "UN" {
continue
}
group := b.region.index(g.Type)
groupIdx := b.groups[group]
for _, mem := range strings.Split(g.Contains, " ") {

View file

@ -396,8 +396,8 @@ type matcher struct {
// matchHeader has the lists of tags for exact matches and matches based on
// maximized and canonicalized tags for a given language.
type matchHeader struct {
exact []*haveTag
max []*haveTag
exact []haveTag
max []haveTag
}
// haveTag holds a supported Tag and its maximized script and region. The maximized
@ -457,7 +457,7 @@ func (h *matchHeader) addIfNew(n haveTag, exact bool) {
}
}
if exact {
h.exact = append(h.exact, &n)
h.exact = append(h.exact, n)
}
// Allow duplicate maximized tags, but create a linked list to allow quickly
// comparing the equivalents and bail out.
@ -472,7 +472,7 @@ func (h *matchHeader) addIfNew(n haveTag, exact bool) {
break
}
}
h.max = append(h.max, &n)
h.max = append(h.max, n)
}
// header returns the matchHeader for the given language. It creates one if
@ -503,7 +503,7 @@ func newMatcher(supported []Tag) *matcher {
pair, _ := makeHaveTag(tag, i)
m.header(tag.lang).addIfNew(pair, true)
}
m.default_ = m.header(supported[0].lang).exact[0]
m.default_ = &m.header(supported[0].lang).exact[0]
for i, tag := range supported {
pair, max := makeHaveTag(tag, i)
if max != tag.lang {
@ -520,8 +520,7 @@ func newMatcher(supported []Tag) *matcher {
return
}
hw := m.header(langID(want))
for _, ht := range hh.max {
v := *ht
for _, v := range hh.max {
if conf < v.conf {
v.conf = conf
}
@ -581,7 +580,7 @@ func (m *matcher) getBest(want ...Tag) (got *haveTag, orig Tag, c Confidence) {
continue
}
for i := range h.exact {
have := h.exact[i]
have := &h.exact[i]
if have.tag.equalsRest(w) {
return have, w, Exact
}
@ -592,7 +591,7 @@ func (m *matcher) getBest(want ...Tag) (got *haveTag, orig Tag, c Confidence) {
// Base language is not defined.
if h != nil {
for i := range h.exact {
have := h.exact[i]
have := &h.exact[i]
if have.tag.equalsRest(w) {
return have, w, Exact
}
@ -610,11 +609,11 @@ func (m *matcher) getBest(want ...Tag) (got *haveTag, orig Tag, c Confidence) {
}
// Check for match based on maximized tag.
for i := range h.max {
have := h.max[i]
have := &h.max[i]
best.update(have, w, max.script, max.region)
if best.conf == Exact {
for have.nextMax != 0 {
have = h.max[have.nextMax]
have = &h.max[have.nextMax]
best.update(have, w, max.script, max.region)
}
return best.have, best.want, High

File diff suppressed because it is too large Load diff

View file

@ -41,35 +41,20 @@ func If(s Set, tIn, tNotIn transform.Transformer) Transformer {
if tNotIn == nil {
tNotIn = transform.Nop
}
sIn, ok := tIn.(transform.SpanningTransformer)
if !ok {
sIn = dummySpan{tIn}
}
sNotIn, ok := tNotIn.(transform.SpanningTransformer)
if !ok {
sNotIn = dummySpan{tNotIn}
}
a := &cond{
tIn: sIn,
tNotIn: sNotIn,
tIn: tIn,
tNotIn: tNotIn,
f: s.Contains,
}
a.Reset()
return Transformer{a}
}
type dummySpan struct{ transform.Transformer }
func (d dummySpan) Span(src []byte, atEOF bool) (n int, err error) {
return 0, transform.ErrEndOfSpan
}
type cond struct {
tIn, tNotIn transform.SpanningTransformer
tIn, tNotIn transform.Transformer
f func(rune) bool
check func(rune) bool // current check to perform
t transform.SpanningTransformer // current transformer to use
check func(rune) bool // current check to perform
t transform.Transformer // current transformer to use
}
// Reset implements transform.Transformer.
@ -99,51 +84,6 @@ func (t *cond) isNot(r rune) bool {
return false
}
// This implementation of Span doesn't help all too much, but it needs to be
// there to satisfy this package's Transformer interface.
// TODO: there are certainly room for improvements, though. For example, if
// t.t == transform.Nop (which will a common occurrence) it will save a bundle
// to special-case that loop.
func (t *cond) Span(src []byte, atEOF bool) (n int, err error) {
p := 0
for n < len(src) && err == nil {
// Don't process too much at a time as the Spanner that will be
// called on this block may terminate early.
const maxChunk = 4096
max := len(src)
if v := n + maxChunk; v < max {
max = v
}
atEnd := false
size := 0
current := t.t
for ; p < max; p += size {
r := rune(src[p])
if r < utf8.RuneSelf {
size = 1
} else if r, size = utf8.DecodeRune(src[p:]); size == 1 {
if !atEOF && !utf8.FullRune(src[p:]) {
err = transform.ErrShortSrc
break
}
}
if !t.check(r) {
// The next rune will be the start of a new run.
atEnd = true
break
}
}
n2, err2 := current.Span(src[n:p], atEnd || (atEOF && p == len(src)))
n += n2
if err2 != nil {
return n, err2
}
// At this point either err != nil or t.check will pass for the rune at p.
p = n + size
}
return n, err
}
func (t *cond) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
p := 0
for nSrc < len(src) && err == nil {
@ -159,10 +99,9 @@ func (t *cond) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error
size := 0
current := t.t
for ; p < max; p += size {
r := rune(src[p])
if r < utf8.RuneSelf {
size = 1
} else if r, size = utf8.DecodeRune(src[p:]); size == 1 {
var r rune
r, size = utf8.DecodeRune(src[p:])
if r == utf8.RuneError && size == 1 {
if !atEOF && !utf8.FullRune(src[p:]) {
err = transform.ErrShortSrc
break

View file

@ -46,19 +46,9 @@ func Predicate(f func(rune) bool) Set {
// Transformer implements the transform.Transformer interface.
type Transformer struct {
t transform.SpanningTransformer
transform.Transformer
}
func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
return t.t.Transform(dst, src, atEOF)
}
func (t Transformer) Span(b []byte, atEOF bool) (n int, err error) {
return t.t.Span(b, atEOF)
}
func (t Transformer) Reset() { t.t.Reset() }
// Bytes returns a new byte slice with the result of converting b using t. It
// calls Reset on t. It returns nil if any error was found. This can only happen
// if an error-producing Transformer is passed to If.
@ -106,57 +96,39 @@ type remove func(r rune) bool
func (remove) Reset() {}
// Span implements transform.Spanner.
func (t remove) Span(src []byte, atEOF bool) (n int, err error) {
for r, size := rune(0), 0; n < len(src); {
if r = rune(src[n]); r < utf8.RuneSelf {
size = 1
} else if r, size = utf8.DecodeRune(src[n:]); size == 1 {
// Invalid rune.
if !atEOF && !utf8.FullRune(src[n:]) {
err = transform.ErrShortSrc
} else {
err = transform.ErrEndOfSpan
}
break
}
if t(r) {
err = transform.ErrEndOfSpan
break
}
n += size
}
return
}
// Transform implements transform.Transformer.
func (t remove) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
for r, size := rune(0), 0; nSrc < len(src); {
if r = rune(src[nSrc]); r < utf8.RuneSelf {
size = 1
} else if r, size = utf8.DecodeRune(src[nSrc:]); size == 1 {
// Invalid rune.
if !atEOF && !utf8.FullRune(src[nSrc:]) {
err = transform.ErrShortSrc
break
}
// We replace illegal bytes with RuneError. Not doing so might
// otherwise turn a sequence of invalid UTF-8 into valid UTF-8.
// The resulting byte sequence may subsequently contain runes
// for which t(r) is true that were passed unnoticed.
if !t(utf8.RuneError) {
if nDst+3 > len(dst) {
err = transform.ErrShortDst
} else {
r, size = utf8.DecodeRune(src[nSrc:])
if size == 1 {
// Invalid rune.
if !atEOF && !utf8.FullRune(src[nSrc:]) {
err = transform.ErrShortSrc
break
}
dst[nDst+0] = runeErrorString[0]
dst[nDst+1] = runeErrorString[1]
dst[nDst+2] = runeErrorString[2]
nDst += 3
// We replace illegal bytes with RuneError. Not doing so might
// otherwise turn a sequence of invalid UTF-8 into valid UTF-8.
// The resulting byte sequence may subsequently contain runes
// for which t(r) is true that were passed unnoticed.
if !t(utf8.RuneError) {
if nDst+3 > len(dst) {
err = transform.ErrShortDst
break
}
dst[nDst+0] = runeErrorString[0]
dst[nDst+1] = runeErrorString[1]
dst[nDst+2] = runeErrorString[2]
nDst += 3
}
nSrc++
continue
}
nSrc++
continue
}
if t(r) {
nSrc += size
continue
@ -185,28 +157,6 @@ type mapper func(rune) rune
func (mapper) Reset() {}
// Span implements transform.Spanner.
func (t mapper) Span(src []byte, atEOF bool) (n int, err error) {
for r, size := rune(0), 0; n < len(src); n += size {
if r = rune(src[n]); r < utf8.RuneSelf {
size = 1
} else if r, size = utf8.DecodeRune(src[n:]); size == 1 {
// Invalid rune.
if !atEOF && !utf8.FullRune(src[n:]) {
err = transform.ErrShortSrc
} else {
err = transform.ErrEndOfSpan
}
break
}
if t(r) != r {
err = transform.ErrEndOfSpan
break
}
}
return n, err
}
// Transform implements transform.Transformer.
func (t mapper) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
var replacement rune
@ -280,51 +230,24 @@ func ReplaceIllFormed() Transformer {
type replaceIllFormed struct{ transform.NopResetter }
func (t replaceIllFormed) Span(src []byte, atEOF bool) (n int, err error) {
for n < len(src) {
// ASCII fast path.
if src[n] < utf8.RuneSelf {
n++
continue
}
r, size := utf8.DecodeRune(src[n:])
// Look for a valid non-ASCII rune.
if r != utf8.RuneError || size != 1 {
n += size
continue
}
// Look for short source data.
if !atEOF && !utf8.FullRune(src[n:]) {
err = transform.ErrShortSrc
break
}
// We have an invalid rune.
err = transform.ErrEndOfSpan
break
}
return n, err
}
func (t replaceIllFormed) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
for nSrc < len(src) {
// ASCII fast path.
if r := src[nSrc]; r < utf8.RuneSelf {
r, size := utf8.DecodeRune(src[nSrc:])
// Look for an ASCII rune.
if r < utf8.RuneSelf {
if nDst == len(dst) {
err = transform.ErrShortDst
break
}
dst[nDst] = r
dst[nDst] = byte(r)
nDst++
nSrc++
continue
}
// Look for a valid non-ASCII rune.
if _, size := utf8.DecodeRune(src[nSrc:]); size != 1 {
if r != utf8.RuneError || size != 1 {
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
err = transform.ErrShortDst
break

View file

@ -123,64 +123,34 @@ var transitions = [...][2]ruleTransition{
// vice versa.
const exclusiveRTL = uint16(1<<bidi.EN | 1<<bidi.AN)
// From RFC 5893
// An RTL label is a label that contains at least one character of type
// R, AL, or AN.
//
// An LTR label is any label that is not an RTL label.
// Direction reports the direction of the given label as defined by RFC 5893.
// The Bidi Rule does not have to be applied to labels of the category
// LeftToRight.
func Direction(b []byte) bidi.Direction {
for i := 0; i < len(b); {
e, sz := bidi.Lookup(b[i:])
if sz == 0 {
i++
// Direction reports the direction of the given label as defined by RFC 5893 or
// an error if b is not a valid label according to the Bidi Rule.
func Direction(b []byte) (bidi.Direction, error) {
t := Transformer{}
if n, ok := t.advance(b); ok && n == len(b) {
switch t.state {
case ruleLTRFinal, ruleInitial:
return bidi.LeftToRight, nil
case ruleRTLFinal:
return bidi.RightToLeft, nil
}
c := e.Class()
if c == bidi.R || c == bidi.AL || c == bidi.AN {
return bidi.RightToLeft
}
i += sz
}
return bidi.LeftToRight
return bidi.Neutral, ErrInvalid
}
// DirectionString reports the direction of the given label as defined by RFC
// 5893. The Bidi Rule does not have to be applied to labels of the category
// LeftToRight.
func DirectionString(s string) bidi.Direction {
for i := 0; i < len(s); {
e, sz := bidi.LookupString(s[i:])
if sz == 0 {
i++
// 5893 or an error if s is not a valid label according to the Bidi Rule.
func DirectionString(s string) (bidi.Direction, error) {
t := Transformer{}
if n, ok := t.advanceString(s); ok && n == len(s) {
switch t.state {
case ruleLTRFinal, ruleInitial:
return bidi.LeftToRight, nil
case ruleRTLFinal:
return bidi.RightToLeft, nil
}
c := e.Class()
if c == bidi.R || c == bidi.AL || c == bidi.AN {
return bidi.RightToLeft
}
i += sz
}
return bidi.LeftToRight
}
// Valid reports whether b conforms to the BiDi rule.
func Valid(b []byte) bool {
var t Transformer
if n, ok := t.advance(b); !ok || n < len(b) {
return false
}
return t.isFinal()
}
// ValidString reports whether s conforms to the BiDi rule.
func ValidString(s string) bool {
var t Transformer
if n, ok := t.advanceString(s); !ok || n < len(s) {
return false
}
return t.isFinal()
return bidi.Neutral, ErrInvalid
}
// New returns a Transformer that verifies that input adheres to the Bidi Rule.
@ -190,23 +160,8 @@ func New() *Transformer {
// Transformer implements transform.Transform.
type Transformer struct {
state ruleState
hasRTL bool
seen uint16
}
// A rule can only be violated for "Bidi Domain names", meaning if one of the
// following categories has been observed.
func (t *Transformer) isRTL() bool {
const isRTL = 1<<bidi.R | 1<<bidi.AL | 1<<bidi.AN
return t.seen&isRTL != 0
}
func (t *Transformer) isFinal() bool {
if !t.isRTL() {
return true
}
return t.state == ruleLTRFinal || t.state == ruleRTLFinal || t.state == ruleInitial
state ruleState
seen uint16
}
// Reset implements transform.Transformer.
@ -230,7 +185,7 @@ func (t *Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, er
// Span returns the first n bytes of src that conform to the Bidi rule.
func (t *Transformer) Span(src []byte, atEOF bool) (n int, err error) {
if t.state == ruleInvalid && t.isRTL() {
if t.state == ruleInvalid {
return 0, ErrInvalid
}
n, ok := t.advance(src)
@ -243,7 +198,7 @@ func (t *Transformer) Span(src []byte, atEOF bool) (n int, err error) {
break
}
err = ErrInvalid
case !t.isFinal():
case t.state != ruleLTRFinal && t.state != ruleRTLFinal && t.state != ruleInitial:
err = ErrInvalid
}
return n, err
@ -270,15 +225,12 @@ func (t *Transformer) advance(s []byte) (n int, ok bool) {
e, sz = bidi.Lookup(s[n:])
if sz <= 1 {
if sz == 1 {
// We always consider invalid UTF-8 to be invalid, even if
// the string has not yet been determined to be RTL.
// TODO: is this correct?
return n, false
return n, false // invalid UTF-8
}
return n, true // incomplete UTF-8 encoding
}
}
// TODO: using CompactClass would result in noticeable speedup.
// TODO: using CompactClass results in noticeable speedup.
// See unicode/bidi/prop.go:Properties.CompactClass.
c := uint16(1 << e.Class())
t.seen |= c
@ -293,9 +245,7 @@ func (t *Transformer) advance(s []byte) (n int, ok bool) {
t.state = tr[1].next
default:
t.state = ruleInvalid
if t.isRTL() {
return n, false
}
return n, false
}
n += sz
}
@ -332,9 +282,7 @@ func (t *Transformer) advanceString(s string) (n int, ok bool) {
t.state = tr[1].next
default:
t.state = ruleInvalid
if t.isRTL() {
return n, false
}
return n, false
}
n += sz
}

View file

@ -6,10 +6,10 @@ package precis
import (
"golang.org/x/text/cases"
"golang.org/x/text/language"
"golang.org/x/text/runes"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
"golang.org/x/text/width"
)
// An Option is used to define the behavior and rules of a Profile.
@ -20,12 +20,11 @@ type options struct {
foldWidth bool
// Enforcement options
asciiLower bool
cases transform.SpanningTransformer
cases transform.Transformer
disallow runes.Set
norm transform.SpanningTransformer
additional []func() transform.SpanningTransformer
width transform.SpanningTransformer
norm norm.Form
additional []func() transform.Transformer
width *width.Transformer
disallowEmpty bool
bidiRule bool
@ -37,11 +36,6 @@ func getOpts(o ...Option) (res options) {
for _, f := range o {
f(&res)
}
// Using a SpanningTransformer, instead of norm.Form prevents an allocation
// down the road.
if res.norm == nil {
res.norm = norm.NFC
}
return
}
@ -80,36 +74,11 @@ var (
}
)
// TODO: move this logic to package transform
type spanWrap struct{ transform.Transformer }
func (s spanWrap) Span(src []byte, atEOF bool) (n int, err error) {
return 0, transform.ErrEndOfSpan
}
// TODO: allow different types? For instance:
// func() transform.Transformer
// func() transform.SpanningTransformer
// func([]byte) bool // validation only
//
// Also, would be great if we could detect if a transformer is reentrant.
// The AdditionalMapping option defines the additional mapping rule for the
// Profile by applying Transformer's in sequence.
func AdditionalMapping(t ...func() transform.Transformer) Option {
return func(o *options) {
for _, f := range t {
sf := func() transform.SpanningTransformer {
return f().(transform.SpanningTransformer)
}
if _, ok := f().(transform.SpanningTransformer); !ok {
sf = func() transform.SpanningTransformer {
return spanWrap{f()}
}
}
o.additional = append(o.additional, sf)
}
o.additional = t
}
}
@ -124,26 +93,10 @@ func Norm(f norm.Form) Option {
// provided to determine the type of case folding used.
func FoldCase(opts ...cases.Option) Option {
return func(o *options) {
o.asciiLower = true
o.cases = cases.Fold(opts...)
}
}
// The LowerCase option defines a Profile's case mapping rule. Options can be
// provided to determine the type of case folding used.
func LowerCase(opts ...cases.Option) Option {
return func(o *options) {
o.asciiLower = true
if len(opts) == 0 {
o.cases = cases.Lower(language.Und, cases.HandleFinalSigma(false))
return
}
opts = append([]cases.Option{cases.HandleFinalSigma(false)}, opts...)
o.cases = cases.Lower(language.Und, opts...)
}
}
// The Disallow option further restricts a Profile's allowed characters beyond
// what is disallowed by the underlying string class.
func Disallow(set runes.Set) Option {

View file

@ -5,12 +5,9 @@
package precis
import (
"bytes"
"errors"
"unicode/utf8"
"golang.org/x/text/cases"
"golang.org/x/text/language"
"golang.org/x/text/runes"
"golang.org/x/text/secure/bidirule"
"golang.org/x/text/transform"
@ -93,80 +90,32 @@ type buffers struct {
next int
}
func (b *buffers) apply(t transform.SpanningTransformer) (err error) {
n, err := t.Span(b.src, true)
if err != transform.ErrEndOfSpan {
return err
}
func (b *buffers) init(n int) {
b.buf[0] = make([]byte, 0, n)
b.buf[1] = make([]byte, 0, n)
}
func (b *buffers) apply(t transform.Transformer) (err error) {
// TODO: use Span, once available.
x := b.next & 1
if b.buf[x] == nil {
b.buf[x] = make([]byte, 0, 8+len(b.src)+len(b.src)>>2)
}
span := append(b.buf[x][:0], b.src[:n]...)
b.src, _, err = transform.Append(t, span, b.src[n:])
b.src, _, err = transform.Append(t, b.buf[x][:0], b.src)
b.buf[x] = b.src
b.next++
return err
}
// Pre-allocate transformers when possible. In some cases this avoids allocation.
var (
foldWidthT transform.SpanningTransformer = width.Fold
lowerCaseT transform.SpanningTransformer = cases.Lower(language.Und, cases.HandleFinalSigma(false))
)
// TODO: make this a method on profile.
func (b *buffers) enforce(p *Profile, src []byte, comparing bool) (str []byte, err error) {
func (b *buffers) enforce(p *Profile, src []byte) (str []byte, err error) {
b.src = src
ascii := true
for _, c := range src {
if c >= utf8.RuneSelf {
ascii = false
break
}
}
// ASCII fast path.
if ascii {
for _, f := range p.options.additional {
if err = b.apply(f()); err != nil {
return nil, err
}
}
switch {
case p.options.asciiLower || (comparing && p.options.ignorecase):
for i, c := range b.src {
if 'A' <= c && c <= 'Z' {
b.src[i] = c ^ 1<<5
}
}
case p.options.cases != nil:
b.apply(p.options.cases)
}
c := checker{p: p}
if _, err := c.span(b.src, true); err != nil {
return nil, err
}
if p.disallow != nil {
for _, c := range b.src {
if p.disallow.Contains(rune(c)) {
return nil, errDisallowedRune
}
}
}
if p.options.disallowEmpty && len(b.src) == 0 {
return nil, errEmptyString
}
return b.src, nil
}
// These transforms are applied in the order defined in
// https://tools.ietf.org/html/rfc7564#section-7
// TODO: allow different width transforms options.
if p.options.foldWidth || (p.options.ignorecase && comparing) {
b.apply(foldWidthT)
if p.options.foldWidth {
// TODO: use Span, once available.
if err = b.apply(width.Fold); err != nil {
return nil, err
}
}
for _, f := range p.options.additional {
if err = b.apply(f()); err != nil {
@ -174,14 +123,24 @@ func (b *buffers) enforce(p *Profile, src []byte, comparing bool) (str []byte, e
}
}
if p.options.cases != nil {
b.apply(p.options.cases)
if err = b.apply(p.options.cases); err != nil {
return nil, err
}
}
if comparing && p.options.ignorecase {
b.apply(lowerCaseT)
if n := p.norm.QuickSpan(b.src); n < len(b.src) {
x := b.next & 1
n = copy(b.buf[x], b.src[:n])
b.src, _, err = transform.Append(p.norm, b.buf[x][:n], b.src[n:])
b.buf[x] = b.src
b.next++
if err != nil {
return nil, err
}
}
b.apply(p.norm)
if p.options.bidiRule && !bidirule.Valid(b.src) {
return nil, bidirule.ErrInvalid
if p.options.bidiRule {
if err := b.apply(bidirule.New()); err != nil {
return nil, err
}
}
c := checker{p: p}
if _, err := c.span(b.src, true); err != nil {
@ -196,6 +155,9 @@ func (b *buffers) enforce(p *Profile, src []byte, comparing bool) (str []byte, e
i += size
}
}
// TODO: Add the disallow empty rule with a dummy transformer?
if p.options.disallowEmpty && len(b.src) == 0 {
return nil, errEmptyString
}
@ -206,16 +168,19 @@ func (b *buffers) enforce(p *Profile, src []byte, comparing bool) (str []byte, e
// It returns an error if the input string is invalid.
func (p *Profile) Append(dst, src []byte) ([]byte, error) {
var buf buffers
b, err := buf.enforce(p, src, false)
buf.init(8 + len(src) + len(src)>>2)
b, err := buf.enforce(p, src)
if err != nil {
return nil, err
}
return append(dst, b...), nil
}
func processBytes(p *Profile, b []byte, key bool) ([]byte, error) {
// Bytes returns a new byte slice with the result of applying the profile to b.
func (p *Profile) Bytes(b []byte) ([]byte, error) {
var buf buffers
b, err := buf.enforce(p, b, key)
buf.init(8 + len(b) + len(b)>>2)
b, err := buf.enforce(p, b)
if err != nil {
return nil, err
}
@ -227,62 +192,39 @@ func processBytes(p *Profile, b []byte, key bool) ([]byte, error) {
return b, nil
}
// Bytes returns a new byte slice with the result of applying the profile to b.
func (p *Profile) Bytes(b []byte) ([]byte, error) {
return processBytes(p, b, false)
}
// AppendCompareKey appends the result of applying p to src (including any
// optional rules to make strings comparable or useful in a map key such as
// applying lowercasing) writing the result to dst. It returns an error if the
// input string is invalid.
func (p *Profile) AppendCompareKey(dst, src []byte) ([]byte, error) {
// String returns a string with the result of applying the profile to s.
func (p *Profile) String(s string) (string, error) {
var buf buffers
b, err := buf.enforce(p, src, true)
if err != nil {
return nil, err
}
return append(dst, b...), nil
}
func processString(p *Profile, s string, key bool) (string, error) {
var buf buffers
b, err := buf.enforce(p, []byte(s), key)
buf.init(8 + len(s) + len(s)>>2)
b, err := buf.enforce(p, []byte(s))
if err != nil {
return "", err
}
return string(b), nil
}
// String returns a string with the result of applying the profile to s.
func (p *Profile) String(s string) (string, error) {
return processString(p, s, false)
}
// CompareKey returns a string that can be used for comparison, hashing, or
// collation.
func (p *Profile) CompareKey(s string) (string, error) {
return processString(p, s, true)
}
// Compare enforces both strings, and then compares them for bit-string identity
// (byte-for-byte equality). If either string cannot be enforced, the comparison
// is false.
func (p *Profile) Compare(a, b string) bool {
var buf buffers
akey, err := buf.enforce(p, []byte(a), true)
a, err := p.String(a)
if err != nil {
return false
}
b, err = p.String(b)
if err != nil {
return false
}
buf = buffers{}
bkey, err := buf.enforce(p, []byte(b), true)
if err != nil {
return false
// TODO: This is out of order. Need to extract the transformation logic and
// put this in where the normal case folding would go (but only for
// comparison).
if p.options.ignorecase {
a = width.Fold.String(a)
b = width.Fold.String(a)
}
return bytes.Compare(akey, bkey) == 0
return a == b
}
// Allowed returns a runes.Set containing every rune that is a member of the

View file

@ -19,51 +19,38 @@ var (
OpaqueString *Profile = opaquestring // Implements the OpaqueString profile defined in RFC 7613 for passwords and other secure labels.
)
// TODO: mvl: "Ultimately, I would manually define the structs for the internal
// profiles. This avoid pulling in unneeded tables when they are not used."
var (
nickname = &Profile{
options: getOpts(
AdditionalMapping(func() transform.Transformer {
return &nickAdditionalMapping{}
}),
IgnoreCase,
Norm(norm.NFKC),
DisallowEmpty,
),
class: freeform,
}
usernameCaseMap = &Profile{
options: getOpts(
FoldWidth,
LowerCase(),
Norm(norm.NFC),
BidiRule,
),
class: identifier,
}
usernameNoCaseMap = &Profile{
options: getOpts(
FoldWidth,
Norm(norm.NFC),
BidiRule,
),
class: identifier,
}
opaquestring = &Profile{
options: getOpts(
AdditionalMapping(func() transform.Transformer {
return mapSpaces
}),
Norm(norm.NFC),
DisallowEmpty,
),
class: freeform,
}
nickname = NewFreeform(
AdditionalMapping(func() transform.Transformer {
return &nickAdditionalMapping{}
}),
IgnoreCase,
Norm(norm.NFKC),
DisallowEmpty,
)
usernameCaseMap = NewIdentifier(
FoldWidth,
FoldCase(),
Norm(norm.NFC),
BidiRule,
)
usernameNoCaseMap = NewIdentifier(
FoldWidth,
Norm(norm.NFC),
BidiRule,
)
opaquestring = NewFreeform(
AdditionalMapping(func() transform.Transformer {
return runes.Map(func(r rune) rune {
if unicode.Is(unicode.Zs, r) {
return ' '
}
return r
})
}),
Norm(norm.NFC),
DisallowEmpty,
)
)
// mapSpaces is a shared value of a runes.Map transformer.
var mapSpaces transform.Transformer = runes.Map(func(r rune) rune {
if unicode.Is(unicode.Zs, r) {
return ' '
}
return r
})

View file

@ -24,10 +24,6 @@ var (
// complete the transformation.
ErrShortSrc = errors.New("transform: short source buffer")
// ErrEndOfSpan means that the input and output (the transformed input)
// are not identical.
ErrEndOfSpan = errors.New("transform: input and output are not identical")
// errInconsistentByteCount means that Transform returned success (nil
// error) but also returned nSrc inconsistent with the src argument.
errInconsistentByteCount = errors.New("transform: inconsistent byte count returned")
@ -64,41 +60,6 @@ type Transformer interface {
Reset()
}
// SpanningTransformer extends the Transformer interface with a Span method
// that determines how much of the input already conforms to the Transformer.
type SpanningTransformer interface {
Transformer
// Span returns a position in src such that transforming src[:n] results in
// identical output src[:n] for these bytes. It does not necessarily return
// the largest such n. The atEOF argument tells whether src represents the
// last bytes of the input.
//
// Callers should always account for the n bytes consumed before
// considering the error err.
//
// A nil error means that all input bytes are known to be identical to the
// output produced by the Transformer. A nil error can be be returned
// regardless of whether atEOF is true. If err is nil, then then n must
// equal len(src); the converse is not necessarily true.
//
// ErrEndOfSpan means that the Transformer output may differ from the
// input after n bytes. Note that n may be len(src), meaning that the output
// would contain additional bytes after otherwise identical output.
// ErrShortSrc means that src had insufficient data to determine whether the
// remaining bytes would change. Other than the error conditions listed
// here, implementations are free to report other errors that arise.
//
// Calling Span can modify the Transformer state as a side effect. In
// effect, it does the transformation just as calling Transform would, only
// without copying to a destination buffer and only up to a point it can
// determine the input and output bytes are the same. This is obviously more
// limited than calling Transform, but can be more efficient in terms of
// copying and allocating buffers. Calls to Span and Transform may be
// interleaved.
Span(src []byte, atEOF bool) (n int, err error)
}
// NopResetter can be embedded by implementations of Transformer to add a nop
// Reset method.
type NopResetter struct{}
@ -317,10 +278,6 @@ func (nop) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
return n, n, err
}
func (nop) Span(src []byte, atEOF bool) (n int, err error) {
return len(src), nil
}
type discard struct{ NopResetter }
func (discard) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
@ -332,8 +289,8 @@ var (
// by consuming all bytes and writing nothing.
Discard Transformer = discard{}
// Nop is a SpanningTransformer that copies src to dst.
Nop SpanningTransformer = nop{}
// Nop is a Transformer that copies src to dst.
Nop Transformer = nop{}
)
// chain is a sequence of links. A chain with N Transformers has N+1 links and
@ -401,8 +358,6 @@ func (c *chain) Reset() {
}
}
// TODO: make chain use Span (is going to be fun to implement!)
// Transform applies the transformers of c in sequence.
func (c *chain) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
// Set up src and dst in the chain.
@ -493,7 +448,8 @@ func (c *chain) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err erro
return dstL.n, srcL.p, err
}
// Deprecated: use runes.Remove instead.
// RemoveFunc returns a Transformer that removes from the input all runes r for
// which f(r) is true. Illegal bytes in the input are replaced by RuneError.
func RemoveFunc(f func(r rune) bool) Transformer {
return removeF(f)
}

View file

@ -84,7 +84,7 @@ func resolvePairedBrackets(s *isolatingRunSequence) {
dirEmbed = R
}
p.locateBrackets(s.p.pairTypes, s.p.pairValues)
p.resolveBrackets(dirEmbed, s.p.initialTypes)
p.resolveBrackets(dirEmbed)
}
type bracketPairer struct {
@ -125,8 +125,6 @@ func (p *bracketPairer) matchOpener(pairValues []rune, opener, closer int) bool
return pairValues[p.indexes[opener]] == pairValues[p.indexes[closer]]
}
const maxPairingDepth = 63
// locateBrackets locates matching bracket pairs according to BD16.
//
// This implementation uses a linked list instead of a stack, because, while
@ -138,17 +136,11 @@ func (p *bracketPairer) locateBrackets(pairTypes []bracketType, pairValues []run
for i, index := range p.indexes {
// look at the bracket type for each character
if pairTypes[index] == bpNone || p.codesIsolatedRun[i] != ON {
// continue scanning
continue
}
switch pairTypes[index] {
case bpNone:
// continue scanning
case bpOpen:
// check if maximum pairing depth reached
if p.openers.Len() == maxPairingDepth {
p.openers.Init()
return
}
// remember opener location, most recent first
p.openers.PushFront(i)
@ -278,7 +270,7 @@ func (p *bracketPairer) classBeforePair(loc bracketPair) Class {
}
// assignBracketType implements rule N0 for a single bracket pair.
func (p *bracketPairer) assignBracketType(loc bracketPair, dirEmbed Class, initialTypes []Class) {
func (p *bracketPairer) assignBracketType(loc bracketPair, dirEmbed Class) {
// rule "N0, a", inspect contents of pair
dirPair := p.classifyPairContent(loc, dirEmbed)
@ -303,33 +295,13 @@ func (p *bracketPairer) assignBracketType(loc bracketPair, dirEmbed Class, initi
// direction
// set the bracket types to the type found
p.setBracketsToType(loc, dirPair, initialTypes)
}
func (p *bracketPairer) setBracketsToType(loc bracketPair, dirPair Class, initialTypes []Class) {
p.codesIsolatedRun[loc.opener] = dirPair
p.codesIsolatedRun[loc.closer] = dirPair
for i := loc.opener + 1; i < loc.closer; i++ {
index := p.indexes[i]
if initialTypes[index] != NSM {
break
}
p.codesIsolatedRun[i] = dirPair
}
for i := loc.closer + 1; i < len(p.indexes); i++ {
index := p.indexes[i]
if initialTypes[index] != NSM {
break
}
p.codesIsolatedRun[i] = dirPair
}
}
// resolveBrackets implements rule N0 for a list of pairs.
func (p *bracketPairer) resolveBrackets(dirEmbed Class, initialTypes []Class) {
func (p *bracketPairer) resolveBrackets(dirEmbed Class) {
for _, loc := range p.pairPositions {
p.assignBracketType(loc, dirEmbed, initialTypes)
p.assignBracketType(loc, dirEmbed)
}
}

View file

@ -309,9 +309,6 @@ func (p *paragraph) determineExplicitEmbeddingLevels() {
}
if isIsolate {
p.resultLevels[i] = stack.lastEmbeddingLevel()
if stack.lastDirectionalOverrideStatus() != ON {
p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
}
}
var newLevel level

View file

@ -8,11 +8,7 @@
// Package norm contains types and functions for normalizing Unicode strings.
package norm // import "golang.org/x/text/unicode/norm"
import (
"unicode/utf8"
"golang.org/x/text/transform"
)
import "unicode/utf8"
// A Form denotes a canonical representation of Unicode code points.
// The Unicode-defined normalization and equivalence forms are:
@ -267,34 +263,6 @@ func (f Form) QuickSpan(b []byte) int {
return n
}
// Span implements transform.SpanningTransformer. It returns a boundary n such
// that b[0:n] == f(b[0:n]). It is not guaranteed to return the largest such n.
func (f Form) Span(b []byte, atEOF bool) (n int, err error) {
n, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), atEOF)
if n < len(b) {
if !ok {
err = transform.ErrEndOfSpan
} else {
err = transform.ErrShortSrc
}
}
return n, err
}
// SpanString returns a boundary n such that s[0:n] == f(s[0:n]).
// It is not guaranteed to return the largest such n.
func (f Form) SpanString(s string, atEOF bool) (n int, err error) {
n, ok := formTable[f].quickSpan(inputString(s), 0, len(s), atEOF)
if n < len(s) {
if !ok {
err = transform.ErrEndOfSpan
} else {
err = transform.ErrShortSrc
}
}
return n, err
}
// quickSpan returns a boundary n such that src[0:n] == f(src[0:n]) and
// whether any non-normalized parts were found. If atEOF is false, n will
// not point past the last segment if this segment might be become
@ -353,7 +321,7 @@ func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool)
return lastSegStart, false
}
// QuickSpanString returns a boundary n such that s[0:n] == f(s[0:n]).
// QuickSpanString returns a boundary n such that b[0:n] == f(s[0:n]).
// It is not guaranteed to return the largest such n.
func (f Form) QuickSpanString(s string) int {
n, _ := formTable[f].quickSpan(inputString(s), 0, len(s), true)

View file

@ -112,6 +112,7 @@ func (r *normReader) Read(p []byte) (int, error) {
}
}
}
panic("should not reach here")
}
// Reader returns a new reader that implements Read

View file

@ -14,32 +14,6 @@ type foldTransform struct {
transform.NopResetter
}
func (foldTransform) Span(src []byte, atEOF bool) (n int, err error) {
for n < len(src) {
if src[n] < utf8.RuneSelf {
// ASCII fast path.
for n++; n < len(src) && src[n] < utf8.RuneSelf; n++ {
}
continue
}
v, size := trie.lookup(src[n:])
if size == 0 { // incomplete UTF-8 encoding
if !atEOF {
err = transform.ErrShortSrc
} else {
n = len(src)
}
break
}
if elem(v)&tagNeedsFold != 0 {
err = transform.ErrEndOfSpan
break
}
n += size
}
return n, err
}
func (foldTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
for nSrc < len(src) {
if src[nSrc] < utf8.RuneSelf {
@ -96,33 +70,6 @@ type narrowTransform struct {
transform.NopResetter
}
func (narrowTransform) Span(src []byte, atEOF bool) (n int, err error) {
for n < len(src) {
if src[n] < utf8.RuneSelf {
// ASCII fast path.
for n++; n < len(src) && src[n] < utf8.RuneSelf; n++ {
}
continue
}
v, size := trie.lookup(src[n:])
if size == 0 { // incomplete UTF-8 encoding
if !atEOF {
err = transform.ErrShortSrc
} else {
n = len(src)
}
break
}
if k := elem(v).kind(); byte(v) == 0 || k != EastAsianFullwidth && k != EastAsianWide && k != EastAsianAmbiguous {
} else {
err = transform.ErrEndOfSpan
break
}
n += size
}
return n, err
}
func (narrowTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
for nSrc < len(src) {
if src[nSrc] < utf8.RuneSelf {
@ -179,30 +126,6 @@ type wideTransform struct {
transform.NopResetter
}
func (wideTransform) Span(src []byte, atEOF bool) (n int, err error) {
for n < len(src) {
// TODO: Consider ASCII fast path. Special-casing ASCII handling can
// reduce the ns/op of BenchmarkWideASCII by about 30%. This is probably
// not enough to warrant the extra code and complexity.
v, size := trie.lookup(src[n:])
if size == 0 { // incomplete UTF-8 encoding
if !atEOF {
err = transform.ErrShortSrc
} else {
n = len(src)
}
break
}
if k := elem(v).kind(); byte(v) == 0 || k != EastAsianHalfwidth && k != EastAsianNarrow {
} else {
err = transform.ErrEndOfSpan
break
}
n += size
}
return n, err
}
func (wideTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
for nSrc < len(src) {
// TODO: Consider ASCII fast path. Special-casing ASCII handling can

View file

@ -153,22 +153,17 @@ func (p Properties) Wide() rune {
// Transformer implements the transform.Transformer interface.
type Transformer struct {
t transform.SpanningTransformer
t transform.Transformer
}
// Reset implements the transform.Transformer interface.
func (t Transformer) Reset() { t.t.Reset() }
// Transform implements the transform.Transformer interface.
// Transform implements the Transformer interface.
func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
return t.t.Transform(dst, src, atEOF)
}
// Span implements the transform.SpanningTransformer interface.
func (t Transformer) Span(src []byte, atEOF bool) (n int, err error) {
return t.t.Span(src, atEOF)
}
// Bytes returns a new byte slice with the result of applying t to b.
func (t Transformer) Bytes(b []byte) []byte {
b, _, _ = transform.Bytes(t, b)