textutil/rune.go - jiri - Git at Google

 // Copyright 2015 The Vanadium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package textutil

 import (
 	"bytes"
 )

 // TODO(toddw): Add UTF16 support.

 const (
 	EOF                = rune(-1) // Indicates the end of a rune stream.
 	LineSeparator      = '\u2028' // Unicode line separator rune.
 	ParagraphSeparator = '\u2029' // Unicode paragraph separator rune.
 )

 // RuneEncoder is the interface to an encoder of a stream of runes into
 // bytes.Buffer.
 type RuneEncoder interface {
 	// Encode encodes r into buf.
 	Encode(r rune, buf *bytes.Buffer)
 }

 // RuneChunkDecoder is the interface to a decoder of a stream of encoded runes
 // that may be arbitrarily chunked.
 //
 // Implementations of RuneChunkDecoder are commonly used to implement io.Writer
 // wrappers, to handle buffering when chunk boundaries may occur in the middle
 // of an encoded rune.
 type RuneChunkDecoder interface {
 	// DecodeRune returns the next rune in chunk, and its width in bytes.  If
 	// chunk represents a partial rune, the chunk is buffered and returns EOF and
 	// the size of the chunk.  Subsequent calls to DecodeRune will combine
 	// previously buffered data when decoding.
 	DecodeRune(chunk []byte) (r rune, n int)
 	// FlushRune returns the next buffered rune.  Returns EOF when all buffered
 	// data is returned.
 	FlushRune() rune
 }

 // WriteRuneChunk is a helper that repeatedly calls d.DecodeRune(chunk) until
 // EOF, calling fn for every rune that is decoded.  Returns the number of bytes
 // in data that were successfully processed.  If fn returns an error,
 // WriteRuneChunk will return with that error, without processing any more data.
 //
 // This is a convenience for implementing io.Writer, given a RuneChunkDecoder.
 func WriteRuneChunk(d RuneChunkDecoder, fn func(rune) error, chunk []byte) (int, error) {
 	pos := 0
 	for pos < len(chunk) {
 		r, size := d.DecodeRune(chunk[pos:])
 		pos += size
 		if r == EOF {
 			break
 		}
 		if err := fn(r); err != nil {
 			return pos, err
 		}
 	}
 	return pos, nil
 }

 // FlushRuneChunk is a helper that repeatedly calls d.FlushRune until EOF,
 // calling fn for every rune that is decoded.  If fn returns an error, Flush
 // will return with that error, without processing any more data.
 //
 // This is a convenience for implementing an additional Flush() call on an
 // implementation of io.Writer, given a RuneChunkDecoder.
 func FlushRuneChunk(d RuneChunkDecoder, fn func(rune) error) error {
 	for {
 		r := d.FlushRune()
 		if r == EOF {
 			return nil
 		}
 		if err := fn(r); err != nil {
 			return err
 		}
 	}
 }

 // bytePos and runePos distinguish positions that are used in either domain;
 // we're trying to avoid silly mistakes like adding a bytePos to a runePos.
 type bytePos int
 type runePos int

 // byteRuneBuffer maintains a buffer with both byte and rune based positions.
 type byteRuneBuffer struct {
 	enc     RuneEncoder
 	buf     bytes.Buffer
 	runeLen runePos
 }

 func (b *byteRuneBuffer) ByteLen() bytePos { return bytePos(b.buf.Len()) }
 func (b *byteRuneBuffer) RuneLen() runePos { return b.runeLen }
 func (b *byteRuneBuffer) Bytes() []byte    { return b.buf.Bytes() }

 func (b *byteRuneBuffer) Reset() {
 	b.buf.Reset()
 	b.runeLen = 0
 }

 // WriteRune writes r into b.
 func (b *byteRuneBuffer) WriteRune(r rune) {
 	b.enc.Encode(r, &b.buf)
 	b.runeLen++
 }

 // WriteString writes str into b.
 func (b *byteRuneBuffer) WriteString(str string) {
 	for _, r := range str {
 		b.WriteRune(r)
 	}
 }

 // WriteString0Runes writes str into b, not incrementing the rune length.
 func (b *byteRuneBuffer) WriteString0Runes(str string) {
 	for _, r := range str {
 		b.enc.Encode(r, &b.buf)
 	}
 }
	// Copyright 2015 The Vanadium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package textutil

	import (
	"bytes"
	)

	// TODO(toddw): Add UTF16 support.

	const (
	EOF = rune(-1) // Indicates the end of a rune stream.
	LineSeparator = '\u2028' // Unicode line separator rune.
	ParagraphSeparator = '\u2029' // Unicode paragraph separator rune.
	)

	// RuneEncoder is the interface to an encoder of a stream of runes into
	// bytes.Buffer.
	type RuneEncoder interface {
	// Encode encodes r into buf.
	Encode(r rune, buf *bytes.Buffer)
	}

	// RuneChunkDecoder is the interface to a decoder of a stream of encoded runes
	// that may be arbitrarily chunked.
	//
	// Implementations of RuneChunkDecoder are commonly used to implement io.Writer
	// wrappers, to handle buffering when chunk boundaries may occur in the middle
	// of an encoded rune.
	type RuneChunkDecoder interface {
	// DecodeRune returns the next rune in chunk, and its width in bytes. If
	// chunk represents a partial rune, the chunk is buffered and returns EOF and
	// the size of the chunk. Subsequent calls to DecodeRune will combine
	// previously buffered data when decoding.
	DecodeRune(chunk []byte) (r rune, n int)
	// FlushRune returns the next buffered rune. Returns EOF when all buffered
	// data is returned.
	FlushRune() rune
	}

	// WriteRuneChunk is a helper that repeatedly calls d.DecodeRune(chunk) until
	// EOF, calling fn for every rune that is decoded. Returns the number of bytes
	// in data that were successfully processed. If fn returns an error,
	// WriteRuneChunk will return with that error, without processing any more data.
	//
	// This is a convenience for implementing io.Writer, given a RuneChunkDecoder.
	func WriteRuneChunk(d RuneChunkDecoder, fn func(rune) error, chunk []byte) (int, error) {
	pos := 0
	for pos < len(chunk) {
	r, size := d.DecodeRune(chunk[pos:])
	pos += size
	if r == EOF {
	break
	}
	if err := fn(r); err != nil {
	return pos, err
	}
	}
	return pos, nil
	}

	// FlushRuneChunk is a helper that repeatedly calls d.FlushRune until EOF,
	// calling fn for every rune that is decoded. If fn returns an error, Flush
	// will return with that error, without processing any more data.
	//
	// This is a convenience for implementing an additional Flush() call on an
	// implementation of io.Writer, given a RuneChunkDecoder.
	func FlushRuneChunk(d RuneChunkDecoder, fn func(rune) error) error {
	for {
	r := d.FlushRune()
	if r == EOF {
	return nil
	}
	if err := fn(r); err != nil {
	return err
	}
	}
	}

	// bytePos and runePos distinguish positions that are used in either domain;
	// we're trying to avoid silly mistakes like adding a bytePos to a runePos.
	type bytePos int
	type runePos int

	// byteRuneBuffer maintains a buffer with both byte and rune based positions.
	type byteRuneBuffer struct {
	enc RuneEncoder
	buf bytes.Buffer
	runeLen runePos
	}

	func (b *byteRuneBuffer) ByteLen() bytePos { return bytePos(b.buf.Len()) }
	func (b *byteRuneBuffer) RuneLen() runePos { return b.runeLen }
	func (b *byteRuneBuffer) Bytes() []byte { return b.buf.Bytes() }

	func (b *byteRuneBuffer) Reset() {
	b.buf.Reset()
	b.runeLen = 0
	}

	// WriteRune writes r into b.
	func (b *byteRuneBuffer) WriteRune(r rune) {
	b.enc.Encode(r, &b.buf)
	b.runeLen++
	}

	// WriteString writes str into b.
	func (b *byteRuneBuffer) WriteString(str string) {
	for _, r := range str {
	b.WriteRune(r)
	}
	}

	// WriteString0Runes writes str into b, not incrementing the rune length.
	func (b *byteRuneBuffer) WriteString0Runes(str string) {
	for _, r := range str {
	b.enc.Encode(r, &b.buf)
	}
	}