Retro68/gcc/libgo/go/encoding/csv/reader.go

// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package csv reads and writes comma-separated values (CSV) files.
//
// A csv file contains zero or more records of one or more fields per record.
// Each record is separated by the newline character. The final record may
// optionally be followed by a newline character.
//
//	field1,field2,field3
//
// White space is considered part of a field.
//
// Carriage returns before newline characters are silently removed.
//
// Blank lines are ignored.  A line with only whitespace characters (excluding
// the ending newline character) is not considered a blank line.
//
// Fields which start and stop with the quote character " are called
// quoted-fields.  The beginning and ending quote are not part of the
// field.
//
// The source:
//
//	normal string,"quoted-field"
//
// results in the fields
//
//	{`normal string`, `quoted-field`}
//
// Within a quoted-field a quote character followed by a second quote
// character is considered a single quote.
//
//	"the ""word"" is true","a ""quoted-field"""
//
// results in
//
//	{`the "word" is true`, `a "quoted-field"`}
//
// Newlines and commas may be included in a quoted-field
//
//	"Multi-line
//	field","comma is ,"
//
// results in
//
//	{`Multi-line
//	field`, `comma is ,`}
package csv

import (
	"bufio"
	"bytes"
	"errors"
	"fmt"
	"io"
	"unicode"
)

// A ParseError is returned for parsing errors.
// The first line is 1.  The first column is 0.
type ParseError struct {
	Line   int   // Line where the error occurred
	Column int   // Column (rune index) where the error occurred
	Err    error // The actual error
}

func (e *ParseError) Error() string {
	return fmt.Sprintf("line %d, column %d: %s", e.Line, e.Column, e.Err)
}

// These are the errors that can be returned in ParseError.Error
var (
	ErrTrailingComma = errors.New("extra delimiter at end of line")
	ErrBareQuote     = errors.New("bare \" in non-quoted-field")
	ErrQuote         = errors.New("extraneous \" in field")
	ErrFieldCount    = errors.New("wrong number of fields in line")
)

// A Reader reads records from a CSV-encoded file.
//
// As returned by NewReader, a Reader expects input conforming to RFC 4180.
// The exported fields can be changed to customize the details before the
// first call to Read or ReadAll.
//
// Comma is the field delimiter.  It defaults to ','.
//
// Comment, if not 0, is the comment character. Lines beginning with the
// Comment character are ignored.
//
// If FieldsPerRecord is positive, Read requires each record to
// have the given number of fields.  If FieldsPerRecord is 0, Read sets it to
// the number of fields in the first record, so that future records must
// have the same field count.
//
// If LazyQuotes is true, a quote may appear in an unquoted field and a
// non-doubled quote may appear in a quoted field.
//
// If TrailingComma is true, the last field may be an unquoted empty field.
//
// If TrimLeadingSpace is true, leading white space in a field is ignored.
type Reader struct {
	Comma            rune // Field delimiter (set to ',' by NewReader)
	Comment          rune // Comment character for start of line
	FieldsPerRecord  int  // Number of expected fields per record
	LazyQuotes       bool // Allow lazy quotes
	TrailingComma    bool // Allow trailing comma
	TrimLeadingSpace bool // Trim leading space
	line             int
	column           int
	r                *bufio.Reader
	field            bytes.Buffer
}

// NewReader returns a new Reader that reads from r.
func NewReader(r io.Reader) *Reader {
	return &Reader{
		Comma: ',',
		r:     bufio.NewReader(r),
	}
}

// error creates a new ParseError based on err.
func (r *Reader) error(err error) error {
	return &ParseError{
		Line:   r.line,
		Column: r.column,
		Err:    err,
	}
}

// Read reads one record from r.  The record is a slice of strings with each
// string representing one field.
func (r *Reader) Read() (record []string, err error) {
	for {
		record, err = r.parseRecord()
		if record != nil {
			break
		}
		if err != nil {
			return nil, err
		}
	}

	if r.FieldsPerRecord > 0 {
		if len(record) != r.FieldsPerRecord {
			r.column = 0 // report at start of record
			return record, r.error(ErrFieldCount)
		}
	} else if r.FieldsPerRecord == 0 {
		r.FieldsPerRecord = len(record)
	}
	return record, nil
}

// ReadAll reads all the remaining records from r.
// Each record is a slice of fields.
// A successful call returns err == nil, not err == EOF. Because ReadAll is
// defined to read until EOF, it does not treat end of file as an error to be
// reported.
func (r *Reader) ReadAll() (records [][]string, err error) {
	for {
		record, err := r.Read()
		if err == io.EOF {
			return records, nil
		}
		if err != nil {
			return nil, err
		}
		records = append(records, record)
	}
	panic("unreachable")
}

// readRune reads one rune from r, folding \r\n to \n and keeping track
// of how far into the line we have read.  r.column will point to the start
// of this rune, not the end of this rune.
func (r *Reader) readRune() (rune, error) {
	r1, _, err := r.r.ReadRune()

	// Handle \r\n here.  We make the simplifying assumption that
	// anytime \r is followed by \n that it can be folded to \n.
	// We will not detect files which contain both \r\n and bare \n.
	if r1 == '\r' {
		r1, _, err = r.r.ReadRune()
		if err == nil {
			if r1 != '\n' {
				r.r.UnreadRune()
				r1 = '\r'
			}
		}
	}
	r.column++
	return r1, err
}

// unreadRune puts the last rune read from r back.
func (r *Reader) unreadRune() {
	r.r.UnreadRune()
	r.column--
}

// skip reads runes up to and including the rune delim or until error.
func (r *Reader) skip(delim rune) error {
	for {
		r1, err := r.readRune()
		if err != nil {
			return err
		}
		if r1 == delim {
			return nil
		}
	}
	panic("unreachable")
}

// parseRecord reads and parses a single csv record from r.
func (r *Reader) parseRecord() (fields []string, err error) {
	// Each record starts on a new line.  We increment our line
	// number (lines start at 1, not 0) and set column to -1
	// so as we increment in readRune it points to the character we read.
	r.line++
	r.column = -1

	// Peek at the first rune.  If it is an error we are done.
	// If we are support comments and it is the comment character
	// then skip to the end of line.

	r1, _, err := r.r.ReadRune()
	if err != nil {
		return nil, err
	}

	if r.Comment != 0 && r1 == r.Comment {
		return nil, r.skip('\n')
	}
	r.r.UnreadRune()

	// At this point we have at least one field.
	for {
		haveField, delim, err := r.parseField()
		if haveField {
			fields = append(fields, r.field.String())
		}
		if delim == '\n' || err == io.EOF {
			return fields, err
		} else if err != nil {
			return nil, err
		}
	}
	panic("unreachable")
}

// parseField parses the next field in the record.  The read field is
// located in r.field.  Delim is the first character not part of the field
// (r.Comma or '\n').
func (r *Reader) parseField() (haveField bool, delim rune, err error) {
	r.field.Reset()

	r1, err := r.readRune()
	if err != nil {
		// If we have EOF and are not at the start of a line
		// then we return the empty field.  We have already
		// checked for trailing commas if needed.
		if err == io.EOF && r.column != 0 {
			return true, 0, err
		}
		return false, 0, err
	}

	if r.TrimLeadingSpace {
		for r1 != '\n' && unicode.IsSpace(r1) {
			r1, err = r.readRune()
			if err != nil {
				return false, 0, err
			}
		}
	}

	switch r1 {
	case r.Comma:
		// will check below

	case '\n':
		// We are a trailing empty field or a blank line
		if r.column == 0 {
			return false, r1, nil
		}
		return true, r1, nil

	case '"':
		// quoted field
	Quoted:
		for {
			r1, err = r.readRune()
			if err != nil {
				if err == io.EOF {
					if r.LazyQuotes {
						return true, 0, err
					}
					return false, 0, r.error(ErrQuote)
				}
				return false, 0, err
			}
			switch r1 {
			case '"':
				r1, err = r.readRune()
				if err != nil || r1 == r.Comma {
					break Quoted
				}
				if r1 == '\n' {
					return true, r1, nil
				}
				if r1 != '"' {
					if !r.LazyQuotes {
						r.column--
						return false, 0, r.error(ErrQuote)
					}
					// accept the bare quote
					r.field.WriteRune('"')
				}
			case '\n':
				r.line++
				r.column = -1
			}
			r.field.WriteRune(r1)
		}

	default:
		// unquoted field
		for {
			r.field.WriteRune(r1)
			r1, err = r.readRune()
			if err != nil || r1 == r.Comma {
				break
			}
			if r1 == '\n' {
				return true, r1, nil
			}
			if !r.LazyQuotes && r1 == '"' {
				return false, 0, r.error(ErrBareQuote)
			}
		}
	}

	if err != nil {
		if err == io.EOF {
			return true, 0, err
		}
		return false, 0, err
	}

	if !r.TrailingComma {
		// We don't allow trailing commas.  See if we
		// are at the end of the line (being mindful
		// of trimming spaces).
		c := r.column
		r1, err = r.readRune()
		if r.TrimLeadingSpace {
			for r1 != '\n' && unicode.IsSpace(r1) {
				r1, err = r.readRune()
				if err != nil {
					break
				}
			}
		}
		if err == io.EOF || r1 == '\n' {
			r.column = c // report the comma
			return false, 0, r.error(ErrTrailingComma)
		}
		r.unreadRune()
	}
	return true, r1, nil
}
add gcc 4.70 2012-03-27 23:13:14 +00:00			`// Copyright 2011 The Go Authors. All rights reserved.`
			`// Use of this source code is governed by a BSD-style`
			`// license that can be found in the LICENSE file.`

			`// Package csv reads and writes comma-separated values (CSV) files.`
			`//`
			`// A csv file contains zero or more records of one or more fields per record.`
			`// Each record is separated by the newline character. The final record may`
			`// optionally be followed by a newline character.`
			`//`
			`// field1,field2,field3`
			`//`
			`// White space is considered part of a field.`
			`//`
			`// Carriage returns before newline characters are silently removed.`
			`//`
			`// Blank lines are ignored. A line with only whitespace characters (excluding`
			`// the ending newline character) is not considered a blank line.`
			`//`
			`// Fields which start and stop with the quote character " are called`
			`// quoted-fields. The beginning and ending quote are not part of the`
			`// field.`
			`//`
			`// The source:`
			`//`
			`// normal string,"quoted-field"`
			`//`
			`// results in the fields`
			`//`
			// {`normal string`, `quoted-field`}
			`//`
			`// Within a quoted-field a quote character followed by a second quote`
			`// character is considered a single quote.`
			`//`
			`// "the ""word"" is true","a ""quoted-field"""`
			`//`
			`// results in`
			`//`
			// {`the "word" is true`, `a "quoted-field"`}
			`//`
			`// Newlines and commas may be included in a quoted-field`
			`//`
			`// "Multi-line`
			`// field","comma is ,"`
			`//`
			`// results in`
			`//`
			// {`Multi-line
			// field`, `comma is ,`}
			`package csv`

			`import (`
			`"bufio"`
			`"bytes"`
			`"errors"`
			`"fmt"`
			`"io"`
			`"unicode"`
			`)`

			`// A ParseError is returned for parsing errors.`
			`// The first line is 1. The first column is 0.`
			`type ParseError struct {`
			`Line int // Line where the error occurred`
			`Column int // Column (rune index) where the error occurred`
			`Err error // The actual error`
			`}`

			`func (e *ParseError) Error() string {`
			`return fmt.Sprintf("line %d, column %d: %s", e.Line, e.Column, e.Err)`
			`}`

			`// These are the errors that can be returned in ParseError.Error`
			`var (`
			`ErrTrailingComma = errors.New("extra delimiter at end of line")`
			`ErrBareQuote = errors.New("bare \" in non-quoted-field")`
			`ErrQuote = errors.New("extraneous \" in field")`
			`ErrFieldCount = errors.New("wrong number of fields in line")`
			`)`

			`// A Reader reads records from a CSV-encoded file.`
			`//`
			`// As returned by NewReader, a Reader expects input conforming to RFC 4180.`
			`// The exported fields can be changed to customize the details before the`
			`// first call to Read or ReadAll.`
			`//`
			`// Comma is the field delimiter. It defaults to ','.`
			`//`
			`// Comment, if not 0, is the comment character. Lines beginning with the`
			`// Comment character are ignored.`
			`//`
			`// If FieldsPerRecord is positive, Read requires each record to`
			`// have the given number of fields. If FieldsPerRecord is 0, Read sets it to`
			`// the number of fields in the first record, so that future records must`
			`// have the same field count.`
			`//`
			`// If LazyQuotes is true, a quote may appear in an unquoted field and a`
			`// non-doubled quote may appear in a quoted field.`
			`//`
			`// If TrailingComma is true, the last field may be an unquoted empty field.`
			`//`
			`// If TrimLeadingSpace is true, leading white space in a field is ignored.`
			`type Reader struct {`
			`Comma rune // Field delimiter (set to ',' by NewReader)`
			`Comment rune // Comment character for start of line`
			`FieldsPerRecord int // Number of expected fields per record`
			`LazyQuotes bool // Allow lazy quotes`
			`TrailingComma bool // Allow trailing comma`
			`TrimLeadingSpace bool // Trim leading space`
			`line int`
			`column int`
			`r *bufio.Reader`
			`field bytes.Buffer`
			`}`

			`// NewReader returns a new Reader that reads from r.`
			`func NewReader(r io.Reader) *Reader {`
			`return &Reader{`
			`Comma: ',',`
			`r: bufio.NewReader(r),`
			`}`
			`}`

			`// error creates a new ParseError based on err.`
			`func (r *Reader) error(err error) error {`
			`return &ParseError{`
			`Line: r.line,`
			`Column: r.column,`
			`Err: err,`
			`}`
			`}`

			`// Read reads one record from r. The record is a slice of strings with each`
			`// string representing one field.`
			`func (r *Reader) Read() (record []string, err error) {`
			`for {`
			`record, err = r.parseRecord()`
			`if record != nil {`
			`break`
			`}`
			`if err != nil {`
			`return nil, err`
			`}`
			`}`

			`if r.FieldsPerRecord > 0 {`
			`if len(record) != r.FieldsPerRecord {`
			`r.column = 0 // report at start of record`
			`return record, r.error(ErrFieldCount)`
			`}`
			`} else if r.FieldsPerRecord == 0 {`
			`r.FieldsPerRecord = len(record)`
			`}`
			`return record, nil`
			`}`

			`// ReadAll reads all the remaining records from r.`
			`// Each record is a slice of fields.`
			`// A successful call returns err == nil, not err == EOF. Because ReadAll is`
			`// defined to read until EOF, it does not treat end of file as an error to be`
			`// reported.`
			`func (r *Reader) ReadAll() (records [][]string, err error) {`
			`for {`
			`record, err := r.Read()`
			`if err == io.EOF {`
			`return records, nil`
			`}`
			`if err != nil {`
			`return nil, err`
			`}`
			`records = append(records, record)`
			`}`
			`panic("unreachable")`
			`}`

			`// readRune reads one rune from r, folding \r\n to \n and keeping track`
			`// of how far into the line we have read. r.column will point to the start`
			`// of this rune, not the end of this rune.`
			`func (r *Reader) readRune() (rune, error) {`
			`r1, _, err := r.r.ReadRune()`

			`// Handle \r\n here. We make the simplifying assumption that`
			`// anytime \r is followed by \n that it can be folded to \n.`
			`// We will not detect files which contain both \r\n and bare \n.`
			`if r1 == '\r' {`
			`r1, _, err = r.r.ReadRune()`
			`if err == nil {`
			`if r1 != '\n' {`
			`r.r.UnreadRune()`
			`r1 = '\r'`
			`}`
			`}`
			`}`
			`r.column++`
			`return r1, err`
			`}`

			`// unreadRune puts the last rune read from r back.`
			`func (r *Reader) unreadRune() {`
			`r.r.UnreadRune()`
			`r.column--`
			`}`

			`// skip reads runes up to and including the rune delim or until error.`
			`func (r *Reader) skip(delim rune) error {`
			`for {`
			`r1, err := r.readRune()`
			`if err != nil {`
			`return err`
			`}`
			`if r1 == delim {`
			`return nil`
			`}`
			`}`
			`panic("unreachable")`
			`}`

			`// parseRecord reads and parses a single csv record from r.`
			`func (r *Reader) parseRecord() (fields []string, err error) {`
			`// Each record starts on a new line. We increment our line`
			`// number (lines start at 1, not 0) and set column to -1`
			`// so as we increment in readRune it points to the character we read.`
			`r.line++`
			`r.column = -1`

			`// Peek at the first rune. If it is an error we are done.`
			`// If we are support comments and it is the comment character`
			`// then skip to the end of line.`

			`r1, _, err := r.r.ReadRune()`
			`if err != nil {`
			`return nil, err`
			`}`

			`if r.Comment != 0 && r1 == r.Comment {`
			`return nil, r.skip('\n')`
			`}`
			`r.r.UnreadRune()`

			`// At this point we have at least one field.`
			`for {`
			`haveField, delim, err := r.parseField()`
			`if haveField {`
			`fields = append(fields, r.field.String())`
			`}`
			`if delim == '\n' \|\| err == io.EOF {`
			`return fields, err`
			`} else if err != nil {`
			`return nil, err`
			`}`
			`}`
			`panic("unreachable")`
			`}`

			`// parseField parses the next field in the record. The read field is`
			`// located in r.field. Delim is the first character not part of the field`
			`// (r.Comma or '\n').`
			`func (r *Reader) parseField() (haveField bool, delim rune, err error) {`
			`r.field.Reset()`

			`r1, err := r.readRune()`
			`if err != nil {`
			`// If we have EOF and are not at the start of a line`
			`// then we return the empty field. We have already`
			`// checked for trailing commas if needed.`
			`if err == io.EOF && r.column != 0 {`
			`return true, 0, err`
			`}`
			`return false, 0, err`
			`}`

			`if r.TrimLeadingSpace {`
			`for r1 != '\n' && unicode.IsSpace(r1) {`
			`r1, err = r.readRune()`
			`if err != nil {`
			`return false, 0, err`
			`}`
			`}`
			`}`

			`switch r1 {`
			`case r.Comma:`
			`// will check below`

			`case '\n':`
			`// We are a trailing empty field or a blank line`
			`if r.column == 0 {`
			`return false, r1, nil`
			`}`
			`return true, r1, nil`

			`case '"':`
			`// quoted field`
			`Quoted:`
			`for {`
			`r1, err = r.readRune()`
			`if err != nil {`
			`if err == io.EOF {`
			`if r.LazyQuotes {`
			`return true, 0, err`
			`}`
			`return false, 0, r.error(ErrQuote)`
			`}`
			`return false, 0, err`
			`}`
			`switch r1 {`
			`case '"':`
			`r1, err = r.readRune()`
			`if err != nil \|\| r1 == r.Comma {`
			`break Quoted`
			`}`
			`if r1 == '\n' {`
			`return true, r1, nil`
			`}`
			`if r1 != '"' {`
			`if !r.LazyQuotes {`
			`r.column--`
			`return false, 0, r.error(ErrQuote)`
			`}`
			`// accept the bare quote`
			`r.field.WriteRune('"')`
			`}`
			`case '\n':`
			`r.line++`
			`r.column = -1`
			`}`
			`r.field.WriteRune(r1)`
			`}`

			`default:`
			`// unquoted field`
			`for {`
			`r.field.WriteRune(r1)`
			`r1, err = r.readRune()`
			`if err != nil \|\| r1 == r.Comma {`
			`break`
			`}`
			`if r1 == '\n' {`
			`return true, r1, nil`
			`}`
			`if !r.LazyQuotes && r1 == '"' {`
			`return false, 0, r.error(ErrBareQuote)`
			`}`
			`}`
			`}`

			`if err != nil {`
			`if err == io.EOF {`
			`return true, 0, err`
			`}`
			`return false, 0, err`
			`}`

			`if !r.TrailingComma {`
			`// We don't allow trailing commas. See if we`
			`// are at the end of the line (being mindful`
			`// of trimming spaces).`
			`c := r.column`
			`r1, err = r.readRune()`
			`if r.TrimLeadingSpace {`
			`for r1 != '\n' && unicode.IsSpace(r1) {`
			`r1, err = r.readRune()`
			`if err != nil {`
			`break`
			`}`
			`}`
			`}`
			`if err == io.EOF \|\| r1 == '\n' {`
			`r.column = c // report the comma`
			`return false, 0, r.error(ErrTrailingComma)`
			`}`
			`r.unreadRune()`
			`}`
			`return true, r1, nil`
			`}`