| // Copyright 2009 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package xml |
| |
| import ( |
| "bytes" |
| "fmt" |
| "io" |
| "reflect" |
| "strings" |
| "testing" |
| "unicode/utf8" |
| ) |
| |
| type toks struct { |
| earlyEOF bool |
| t []Token |
| } |
| |
| func (t *toks) Token() (Token, error) { |
| if len(t.t) == 0 { |
| return nil, io.EOF |
| } |
| var tok Token |
| tok, t.t = t.t[0], t.t[1:] |
| if t.earlyEOF && len(t.t) == 0 { |
| return tok, io.EOF |
| } |
| return tok, nil |
| } |
| |
| func TestDecodeEOF(t *testing.T) { |
| start := StartElement{Name: Name{Local: "test"}} |
| tests := []struct { |
| name string |
| tokens []Token |
| ok bool |
| }{ |
| { |
| name: "OK", |
| tokens: []Token{ |
| start, |
| start.End(), |
| }, |
| ok: true, |
| }, |
| { |
| name: "Malformed", |
| tokens: []Token{ |
| start, |
| StartElement{Name: Name{Local: "bad"}}, |
| start.End(), |
| }, |
| ok: false, |
| }, |
| } |
| for _, tc := range tests { |
| for _, eof := range []bool{true, false} { |
| name := fmt.Sprintf("%s/earlyEOF=%v", tc.name, eof) |
| t.Run(name, func(t *testing.T) { |
| d := NewTokenDecoder(&toks{ |
| earlyEOF: eof, |
| t: tc.tokens, |
| }) |
| err := d.Decode(&struct { |
| XMLName Name `xml:"test"` |
| }{}) |
| if tc.ok && err != nil { |
| t.Fatalf("d.Decode: expected nil error, got %v", err) |
| } |
| if _, ok := err.(*SyntaxError); !tc.ok && !ok { |
| t.Errorf("d.Decode: expected syntax error, got %v", err) |
| } |
| }) |
| } |
| } |
| } |
| |
| type toksNil struct { |
| returnEOF bool |
| t []Token |
| } |
| |
| func (t *toksNil) Token() (Token, error) { |
| if len(t.t) == 0 { |
| if !t.returnEOF { |
| // Return nil, nil before returning an EOF. It's legal, but |
| // discouraged. |
| t.returnEOF = true |
| return nil, nil |
| } |
| return nil, io.EOF |
| } |
| var tok Token |
| tok, t.t = t.t[0], t.t[1:] |
| return tok, nil |
| } |
| |
| func TestDecodeNilToken(t *testing.T) { |
| for _, strict := range []bool{true, false} { |
| name := fmt.Sprintf("Strict=%v", strict) |
| t.Run(name, func(t *testing.T) { |
| start := StartElement{Name: Name{Local: "test"}} |
| bad := StartElement{Name: Name{Local: "bad"}} |
| d := NewTokenDecoder(&toksNil{ |
| // Malformed |
| t: []Token{start, bad, start.End()}, |
| }) |
| d.Strict = strict |
| err := d.Decode(&struct { |
| XMLName Name `xml:"test"` |
| }{}) |
| if _, ok := err.(*SyntaxError); !ok { |
| t.Errorf("d.Decode: expected syntax error, got %v", err) |
| } |
| }) |
| } |
| } |
| |
| const testInput = ` |
| <?xml version="1.0" encoding="UTF-8"?> |
| <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" |
| "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
| <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` + |
| "\r\n\t" + ` > |
| <hello lang="en">World <>'" 白鵬翔</hello> |
| <query>&何; &is-it;</query> |
| <goodbye /> |
| <outer foo:attr="value" xmlns:tag="ns4"> |
| <inner/> |
| </outer> |
| <tag:name> |
| <![CDATA[Some text here.]]> |
| </tag:name> |
| </body><!-- missing final newline -->` |
| |
| var testEntity = map[string]string{"何": "What", "is-it": "is it?"} |
| |
| var rawTokens = []Token{ |
| CharData("\n"), |
| ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, |
| CharData("\n"), |
| Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" |
| "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), |
| CharData("\n"), |
| StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, |
| CharData("\n "), |
| StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, |
| CharData("World <>'\" 白鵬翔"), |
| EndElement{Name{"", "hello"}}, |
| CharData("\n "), |
| StartElement{Name{"", "query"}, []Attr{}}, |
| CharData("What is it?"), |
| EndElement{Name{"", "query"}}, |
| CharData("\n "), |
| StartElement{Name{"", "goodbye"}, []Attr{}}, |
| EndElement{Name{"", "goodbye"}}, |
| CharData("\n "), |
| StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, |
| CharData("\n "), |
| StartElement{Name{"", "inner"}, []Attr{}}, |
| EndElement{Name{"", "inner"}}, |
| CharData("\n "), |
| EndElement{Name{"", "outer"}}, |
| CharData("\n "), |
| StartElement{Name{"tag", "name"}, []Attr{}}, |
| CharData("\n "), |
| CharData("Some text here."), |
| CharData("\n "), |
| EndElement{Name{"tag", "name"}}, |
| CharData("\n"), |
| EndElement{Name{"", "body"}}, |
| Comment(" missing final newline "), |
| } |
| |
| var cookedTokens = []Token{ |
| CharData("\n"), |
| ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, |
| CharData("\n"), |
| Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" |
| "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), |
| CharData("\n"), |
| StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, |
| CharData("\n "), |
| StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, |
| CharData("World <>'\" 白鵬翔"), |
| EndElement{Name{"ns2", "hello"}}, |
| CharData("\n "), |
| StartElement{Name{"ns2", "query"}, []Attr{}}, |
| CharData("What is it?"), |
| EndElement{Name{"ns2", "query"}}, |
| CharData("\n "), |
| StartElement{Name{"ns2", "goodbye"}, []Attr{}}, |
| EndElement{Name{"ns2", "goodbye"}}, |
| CharData("\n "), |
| StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, |
| CharData("\n "), |
| StartElement{Name{"ns2", "inner"}, []Attr{}}, |
| EndElement{Name{"ns2", "inner"}}, |
| CharData("\n "), |
| EndElement{Name{"ns2", "outer"}}, |
| CharData("\n "), |
| StartElement{Name{"ns3", "name"}, []Attr{}}, |
| CharData("\n "), |
| CharData("Some text here."), |
| CharData("\n "), |
| EndElement{Name{"ns3", "name"}}, |
| CharData("\n"), |
| EndElement{Name{"ns2", "body"}}, |
| Comment(" missing final newline "), |
| } |
| |
| const testInputAltEncoding = ` |
| <?xml version="1.0" encoding="x-testing-uppercase"?> |
| <TAG>VALUE</TAG>` |
| |
| var rawTokensAltEncoding = []Token{ |
| CharData("\n"), |
| ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)}, |
| CharData("\n"), |
| StartElement{Name{"", "tag"}, []Attr{}}, |
| CharData("value"), |
| EndElement{Name{"", "tag"}}, |
| } |
| |
| var xmlInput = []string{ |
| // unexpected EOF cases |
| "<", |
| "<t", |
| "<t ", |
| "<t/", |
| "<!", |
| "<!-", |
| "<!--", |
| "<!--c-", |
| "<!--c--", |
| "<!d", |
| "<t></", |
| "<t></t", |
| "<?", |
| "<?p", |
| "<t a", |
| "<t a=", |
| "<t a='", |
| "<t a=''", |
| "<t/><![", |
| "<t/><![C", |
| "<t/><![CDATA[d", |
| "<t/><![CDATA[d]", |
| "<t/><![CDATA[d]]", |
| |
| // other Syntax errors |
| "<>", |
| "<t/a", |
| "<0 />", |
| "<?0 >", |
| // "<!0 >", // let the Token() caller handle |
| "</0>", |
| "<t 0=''>", |
| "<t a='&'>", |
| "<t a='<'>", |
| "<t> c;</t>", |
| "<t a>", |
| "<t a=>", |
| "<t a=v>", |
| // "<![CDATA[d]]>", // let the Token() caller handle |
| "<t></e>", |
| "<t></>", |
| "<t></t!", |
| "<t>cdata]]></t>", |
| } |
| |
| func TestRawToken(t *testing.T) { |
| d := NewDecoder(strings.NewReader(testInput)) |
| d.Entity = testEntity |
| testRawToken(t, d, testInput, rawTokens) |
| } |
| |
| const nonStrictInput = ` |
| <tag>non&entity</tag> |
| <tag>&unknown;entity</tag> |
| <tag>{</tag> |
| <tag>&#zzz;</tag> |
| <tag>&なまえ3;</tag> |
| <tag><-gt;</tag> |
| <tag>&;</tag> |
| <tag>&0a;</tag> |
| ` |
| |
| var nonStrictTokens = []Token{ |
| CharData("\n"), |
| StartElement{Name{"", "tag"}, []Attr{}}, |
| CharData("non&entity"), |
| EndElement{Name{"", "tag"}}, |
| CharData("\n"), |
| StartElement{Name{"", "tag"}, []Attr{}}, |
| CharData("&unknown;entity"), |
| EndElement{Name{"", "tag"}}, |
| CharData("\n"), |
| StartElement{Name{"", "tag"}, []Attr{}}, |
| CharData("{"), |
| EndElement{Name{"", "tag"}}, |
| CharData("\n"), |
| StartElement{Name{"", "tag"}, []Attr{}}, |
| CharData("&#zzz;"), |
| EndElement{Name{"", "tag"}}, |
| CharData("\n"), |
| StartElement{Name{"", "tag"}, []Attr{}}, |
| CharData("&なまえ3;"), |
| EndElement{Name{"", "tag"}}, |
| CharData("\n"), |
| StartElement{Name{"", "tag"}, []Attr{}}, |
| CharData("<-gt;"), |
| EndElement{Name{"", "tag"}}, |
| CharData("\n"), |
| StartElement{Name{"", "tag"}, []Attr{}}, |
| CharData("&;"), |
| EndElement{Name{"", "tag"}}, |
| CharData("\n"), |
| StartElement{Name{"", "tag"}, []Attr{}}, |
| CharData("&0a;"), |
| EndElement{Name{"", "tag"}}, |
| CharData("\n"), |
| } |
| |
| func TestNonStrictRawToken(t *testing.T) { |
| d := NewDecoder(strings.NewReader(nonStrictInput)) |
| d.Strict = false |
| testRawToken(t, d, nonStrictInput, nonStrictTokens) |
| } |
| |
| type downCaser struct { |
| t *testing.T |
| r io.ByteReader |
| } |
| |
| func (d *downCaser) ReadByte() (c byte, err error) { |
| c, err = d.r.ReadByte() |
| if c >= 'A' && c <= 'Z' { |
| c += 'a' - 'A' |
| } |
| return |
| } |
| |
| func (d *downCaser) Read(p []byte) (int, error) { |
| d.t.Fatalf("unexpected Read call on downCaser reader") |
| panic("unreachable") |
| } |
| |
| func TestRawTokenAltEncoding(t *testing.T) { |
| d := NewDecoder(strings.NewReader(testInputAltEncoding)) |
| d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { |
| if charset != "x-testing-uppercase" { |
| t.Fatalf("unexpected charset %q", charset) |
| } |
| return &downCaser{t, input.(io.ByteReader)}, nil |
| } |
| testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding) |
| } |
| |
| func TestRawTokenAltEncodingNoConverter(t *testing.T) { |
| d := NewDecoder(strings.NewReader(testInputAltEncoding)) |
| token, err := d.RawToken() |
| if token == nil { |
| t.Fatalf("expected a token on first RawToken call") |
| } |
| if err != nil { |
| t.Fatal(err) |
| } |
| token, err = d.RawToken() |
| if token != nil { |
| t.Errorf("expected a nil token; got %#v", token) |
| } |
| if err == nil { |
| t.Fatalf("expected an error on second RawToken call") |
| } |
| const encoding = "x-testing-uppercase" |
| if !strings.Contains(err.Error(), encoding) { |
| t.Errorf("expected error to contain %q; got error: %v", |
| encoding, err) |
| } |
| } |
| |
| func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) { |
| lastEnd := int64(0) |
| for i, want := range rawTokens { |
| start := d.InputOffset() |
| have, err := d.RawToken() |
| end := d.InputOffset() |
| if err != nil { |
| t.Fatalf("token %d: unexpected error: %s", i, err) |
| } |
| if !reflect.DeepEqual(have, want) { |
| var shave, swant string |
| if _, ok := have.(CharData); ok { |
| shave = fmt.Sprintf("CharData(%q)", have) |
| } else { |
| shave = fmt.Sprintf("%#v", have) |
| } |
| if _, ok := want.(CharData); ok { |
| swant = fmt.Sprintf("CharData(%q)", want) |
| } else { |
| swant = fmt.Sprintf("%#v", want) |
| } |
| t.Errorf("token %d = %s, want %s", i, shave, swant) |
| } |
| |
| // Check that InputOffset returned actual token. |
| switch { |
| case start < lastEnd: |
| t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have) |
| case start >= end: |
| // Special case: EndElement can be synthesized. |
| if start == end && end == lastEnd { |
| break |
| } |
| t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have) |
| case end > int64(len(raw)): |
| t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have) |
| default: |
| text := raw[start:end] |
| if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) { |
| t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have) |
| } |
| } |
| lastEnd = end |
| } |
| } |
| |
| // Ensure that directives (specifically !DOCTYPE) include the complete |
| // text of any nested directives, noting that < and > do not change |
| // nesting depth if they are in single or double quotes. |
| |
| var nestedDirectivesInput = ` |
| <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> |
| <!DOCTYPE [<!ENTITY xlt ">">]> |
| <!DOCTYPE [<!ENTITY xlt "<">]> |
| <!DOCTYPE [<!ENTITY xlt '>'>]> |
| <!DOCTYPE [<!ENTITY xlt '<'>]> |
| <!DOCTYPE [<!ENTITY xlt '">'>]> |
| <!DOCTYPE [<!ENTITY xlt "'<">]> |
| ` |
| |
| var nestedDirectivesTokens = []Token{ |
| CharData("\n"), |
| Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), |
| CharData("\n"), |
| Directive(`DOCTYPE [<!ENTITY xlt ">">]`), |
| CharData("\n"), |
| Directive(`DOCTYPE [<!ENTITY xlt "<">]`), |
| CharData("\n"), |
| Directive(`DOCTYPE [<!ENTITY xlt '>'>]`), |
| CharData("\n"), |
| Directive(`DOCTYPE [<!ENTITY xlt '<'>]`), |
| CharData("\n"), |
| Directive(`DOCTYPE [<!ENTITY xlt '">'>]`), |
| CharData("\n"), |
| Directive(`DOCTYPE [<!ENTITY xlt "'<">]`), |
| CharData("\n"), |
| } |
| |
| func TestNestedDirectives(t *testing.T) { |
| d := NewDecoder(strings.NewReader(nestedDirectivesInput)) |
| |
| for i, want := range nestedDirectivesTokens { |
| have, err := d.Token() |
| if err != nil { |
| t.Fatalf("token %d: unexpected error: %s", i, err) |
| } |
| if !reflect.DeepEqual(have, want) { |
| t.Errorf("token %d = %#v want %#v", i, have, want) |
| } |
| } |
| } |
| |
| func TestToken(t *testing.T) { |
| d := NewDecoder(strings.NewReader(testInput)) |
| d.Entity = testEntity |
| |
| for i, want := range cookedTokens { |
| have, err := d.Token() |
| if err != nil { |
| t.Fatalf("token %d: unexpected error: %s", i, err) |
| } |
| if !reflect.DeepEqual(have, want) { |
| t.Errorf("token %d = %#v want %#v", i, have, want) |
| } |
| } |
| } |
| |
| func TestSyntax(t *testing.T) { |
| for i := range xmlInput { |
| d := NewDecoder(strings.NewReader(xmlInput[i])) |
| var err error |
| for _, err = d.Token(); err == nil; _, err = d.Token() { |
| } |
| if _, ok := err.(*SyntaxError); !ok { |
| t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i]) |
| } |
| } |
| } |
| |
| func TestInputLinePos(t *testing.T) { |
| testInput := `<root> |
| <?pi |
| ?> <elt |
| att |
| = |
| "val"> |
| <![CDATA[ |
| ]]><!-- |
| |
| --></elt> |
| </root>` |
| linePos := [][]int{ |
| {1, 7}, |
| {2, 1}, |
| {3, 4}, |
| {3, 6}, |
| {6, 7}, |
| {7, 1}, |
| {8, 4}, |
| {10, 4}, |
| {10, 10}, |
| {11, 1}, |
| {11, 8}, |
| } |
| dec := NewDecoder(strings.NewReader(testInput)) |
| for _, want := range linePos { |
| if _, err := dec.Token(); err != nil { |
| t.Errorf("Unexpected error: %v", err) |
| continue |
| } |
| |
| gotLine, gotCol := dec.InputPos() |
| if gotLine != want[0] || gotCol != want[1] { |
| t.Errorf("dec.InputPos() = %d,%d, want %d,%d", gotLine, gotCol, want[0], want[1]) |
| } |
| } |
| } |
| |
| type allScalars struct { |
| True1 bool |
| True2 bool |
| False1 bool |
| False2 bool |
| Int int |
| Int8 int8 |
| Int16 int16 |
| Int32 int32 |
| Int64 int64 |
| Uint int |
| Uint8 uint8 |
| Uint16 uint16 |
| Uint32 uint32 |
| Uint64 uint64 |
| Uintptr uintptr |
| Float32 float32 |
| Float64 float64 |
| String string |
| PtrString *string |
| } |
| |
| var all = allScalars{ |
| True1: true, |
| True2: true, |
| False1: false, |
| False2: false, |
| Int: 1, |
| Int8: -2, |
| Int16: 3, |
| Int32: -4, |
| Int64: 5, |
| Uint: 6, |
| Uint8: 7, |
| Uint16: 8, |
| Uint32: 9, |
| Uint64: 10, |
| Uintptr: 11, |
| Float32: 13.0, |
| Float64: 14.0, |
| String: "15", |
| PtrString: &sixteen, |
| } |
| |
| var sixteen = "16" |
| |
| const testScalarsInput = `<allscalars> |
| <True1>true</True1> |
| <True2>1</True2> |
| <False1>false</False1> |
| <False2>0</False2> |
| <Int>1</Int> |
| <Int8>-2</Int8> |
| <Int16>3</Int16> |
| <Int32>-4</Int32> |
| <Int64>5</Int64> |
| <Uint>6</Uint> |
| <Uint8>7</Uint8> |
| <Uint16>8</Uint16> |
| <Uint32>9</Uint32> |
| <Uint64>10</Uint64> |
| <Uintptr>11</Uintptr> |
| <Float>12.0</Float> |
| <Float32>13.0</Float32> |
| <Float64>14.0</Float64> |
| <String>15</String> |
| <PtrString>16</PtrString> |
| </allscalars>` |
| |
| func TestAllScalars(t *testing.T) { |
| var a allScalars |
| err := Unmarshal([]byte(testScalarsInput), &a) |
| |
| if err != nil { |
| t.Fatal(err) |
| } |
| if !reflect.DeepEqual(a, all) { |
| t.Errorf("have %+v want %+v", a, all) |
| } |
| } |
| |
| type item struct { |
| FieldA string |
| } |
| |
| func TestIssue569(t *testing.T) { |
| data := `<item><FieldA>abcd</FieldA></item>` |
| var i item |
| err := Unmarshal([]byte(data), &i) |
| |
| if err != nil || i.FieldA != "abcd" { |
| t.Fatal("Expecting abcd") |
| } |
| } |
| |
| func TestUnquotedAttrs(t *testing.T) { |
| data := "<tag attr=azAZ09:-_\t>" |
| d := NewDecoder(strings.NewReader(data)) |
| d.Strict = false |
| token, err := d.Token() |
| if _, ok := err.(*SyntaxError); ok { |
| t.Errorf("Unexpected error: %v", err) |
| } |
| if token.(StartElement).Name.Local != "tag" { |
| t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) |
| } |
| attr := token.(StartElement).Attr[0] |
| if attr.Value != "azAZ09:-_" { |
| t.Errorf("Unexpected attribute value: %v", attr.Value) |
| } |
| if attr.Name.Local != "attr" { |
| t.Errorf("Unexpected attribute name: %v", attr.Name.Local) |
| } |
| } |
| |
| func TestValuelessAttrs(t *testing.T) { |
| tests := [][3]string{ |
| {"<p nowrap>", "p", "nowrap"}, |
| {"<p nowrap >", "p", "nowrap"}, |
| {"<input checked/>", "input", "checked"}, |
| {"<input checked />", "input", "checked"}, |
| } |
| for _, test := range tests { |
| d := NewDecoder(strings.NewReader(test[0])) |
| d.Strict = false |
| token, err := d.Token() |
| if _, ok := err.(*SyntaxError); ok { |
| t.Errorf("Unexpected error: %v", err) |
| } |
| if token.(StartElement).Name.Local != test[1] { |
| t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) |
| } |
| attr := token.(StartElement).Attr[0] |
| if attr.Value != test[2] { |
| t.Errorf("Unexpected attribute value: %v", attr.Value) |
| } |
| if attr.Name.Local != test[2] { |
| t.Errorf("Unexpected attribute name: %v", attr.Name.Local) |
| } |
| } |
| } |
| |
| func TestCopyTokenCharData(t *testing.T) { |
| data := []byte("same data") |
| var tok1 Token = CharData(data) |
| tok2 := CopyToken(tok1) |
| if !reflect.DeepEqual(tok1, tok2) { |
| t.Error("CopyToken(CharData) != CharData") |
| } |
| data[1] = 'o' |
| if reflect.DeepEqual(tok1, tok2) { |
| t.Error("CopyToken(CharData) uses same buffer.") |
| } |
| } |
| |
| func TestCopyTokenStartElement(t *testing.T) { |
| elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}} |
| var tok1 Token = elt |
| tok2 := CopyToken(tok1) |
| if tok1.(StartElement).Attr[0].Value != "en" { |
| t.Error("CopyToken overwrote Attr[0]") |
| } |
| if !reflect.DeepEqual(tok1, tok2) { |
| t.Error("CopyToken(StartElement) != StartElement") |
| } |
| tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"} |
| if reflect.DeepEqual(tok1, tok2) { |
| t.Error("CopyToken(CharData) uses same buffer.") |
| } |
| } |
| |
| func TestCopyTokenComment(t *testing.T) { |
| data := []byte("<!-- some comment -->") |
| var tok1 Token = Comment(data) |
| tok2 := CopyToken(tok1) |
| if !reflect.DeepEqual(tok1, tok2) { |
| t.Error("CopyToken(Comment) != Comment") |
| } |
| data[1] = 'o' |
| if reflect.DeepEqual(tok1, tok2) { |
| t.Error("CopyToken(Comment) uses same buffer.") |
| } |
| } |
| |
| func TestSyntaxErrorLineNum(t *testing.T) { |
| testInput := "<P>Foo<P>\n\n<P>Bar</>\n" |
| d := NewDecoder(strings.NewReader(testInput)) |
| var err error |
| for _, err = d.Token(); err == nil; _, err = d.Token() { |
| } |
| synerr, ok := err.(*SyntaxError) |
| if !ok { |
| t.Error("Expected SyntaxError.") |
| } |
| if synerr.Line != 3 { |
| t.Error("SyntaxError didn't have correct line number.") |
| } |
| } |
| |
| func TestTrailingRawToken(t *testing.T) { |
| input := `<FOO></FOO> ` |
| d := NewDecoder(strings.NewReader(input)) |
| var err error |
| for _, err = d.RawToken(); err == nil; _, err = d.RawToken() { |
| } |
| if err != io.EOF { |
| t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err) |
| } |
| } |
| |
| func TestTrailingToken(t *testing.T) { |
| input := `<FOO></FOO> ` |
| d := NewDecoder(strings.NewReader(input)) |
| var err error |
| for _, err = d.Token(); err == nil; _, err = d.Token() { |
| } |
| if err != io.EOF { |
| t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) |
| } |
| } |
| |
| func TestEntityInsideCDATA(t *testing.T) { |
| input := `<test><![CDATA[ &val=foo ]]></test>` |
| d := NewDecoder(strings.NewReader(input)) |
| var err error |
| for _, err = d.Token(); err == nil; _, err = d.Token() { |
| } |
| if err != io.EOF { |
| t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) |
| } |
| } |
| |
| var characterTests = []struct { |
| in string |
| err string |
| }{ |
| {"\x12<doc/>", "illegal character code U+0012"}, |
| {"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"}, |
| {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"}, |
| {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"}, |
| {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"}, |
| {"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"}, |
| {"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"}, |
| {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"}, |
| {"<doc>&hello;</doc>", "invalid character entity &hello;"}, |
| } |
| |
| func TestDisallowedCharacters(t *testing.T) { |
| |
| for i, tt := range characterTests { |
| d := NewDecoder(strings.NewReader(tt.in)) |
| var err error |
| |
| for err == nil { |
| _, err = d.Token() |
| } |
| synerr, ok := err.(*SyntaxError) |
| if !ok { |
| t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err) |
| } |
| if synerr.Msg != tt.err { |
| t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg) |
| } |
| } |
| } |
| |
| func TestIsInCharacterRange(t *testing.T) { |
| invalid := []rune{ |
| utf8.MaxRune + 1, |
| 0xD800, // surrogate min |
| 0xDFFF, // surrogate max |
| -1, |
| } |
| for _, r := range invalid { |
| if isInCharacterRange(r) { |
| t.Errorf("rune %U considered valid", r) |
| } |
| } |
| } |
| |
| var procInstTests = []struct { |
| input string |
| expect [2]string |
| }{ |
| {`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}}, |
| {`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}}, |
| {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}}, |
| {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}}, |
| {`encoding="FOO" `, [2]string{"", "FOO"}}, |
| } |
| |
| func TestProcInstEncoding(t *testing.T) { |
| for _, test := range procInstTests { |
| if got := procInst("version", test.input); got != test.expect[0] { |
| t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0]) |
| } |
| if got := procInst("encoding", test.input); got != test.expect[1] { |
| t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1]) |
| } |
| } |
| } |
| |
| // Ensure that directives with comments include the complete |
| // text of any nested directives. |
| |
| var directivesWithCommentsInput = ` |
| <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> |
| <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]> |
| <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]> |
| ` |
| |
| var directivesWithCommentsTokens = []Token{ |
| CharData("\n"), |
| Directive(`DOCTYPE [ <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), |
| CharData("\n"), |
| Directive(`DOCTYPE [<!ENTITY go "Golang"> ]`), |
| CharData("\n"), |
| Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang"> ]`), |
| CharData("\n"), |
| } |
| |
| func TestDirectivesWithComments(t *testing.T) { |
| d := NewDecoder(strings.NewReader(directivesWithCommentsInput)) |
| |
| for i, want := range directivesWithCommentsTokens { |
| have, err := d.Token() |
| if err != nil { |
| t.Fatalf("token %d: unexpected error: %s", i, err) |
| } |
| if !reflect.DeepEqual(have, want) { |
| t.Errorf("token %d = %#v want %#v", i, have, want) |
| } |
| } |
| } |
| |
| // Writer whose Write method always returns an error. |
| type errWriter struct{} |
| |
| func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") } |
| |
| func TestEscapeTextIOErrors(t *testing.T) { |
| expectErr := "unwritable" |
| err := EscapeText(errWriter{}, []byte{'A'}) |
| |
| if err == nil || err.Error() != expectErr { |
| t.Errorf("have %v, want %v", err, expectErr) |
| } |
| } |
| |
| func TestEscapeTextInvalidChar(t *testing.T) { |
| input := []byte("A \x00 terminated string.") |
| expected := "A \uFFFD terminated string." |
| |
| buff := new(bytes.Buffer) |
| if err := EscapeText(buff, input); err != nil { |
| t.Fatalf("have %v, want nil", err) |
| } |
| text := buff.String() |
| |
| if text != expected { |
| t.Errorf("have %v, want %v", text, expected) |
| } |
| } |
| |
| func TestIssue5880(t *testing.T) { |
| type T []byte |
| data, err := Marshal(T{192, 168, 0, 1}) |
| if err != nil { |
| t.Errorf("Marshal error: %v", err) |
| } |
| if !utf8.Valid(data) { |
| t.Errorf("Marshal generated invalid UTF-8: %x", data) |
| } |
| } |
| |
| func TestIssue11405(t *testing.T) { |
| testCases := []string{ |
| "<root>", |
| "<root><foo>", |
| "<root><foo></foo>", |
| } |
| for _, tc := range testCases { |
| d := NewDecoder(strings.NewReader(tc)) |
| var err error |
| for { |
| _, err = d.Token() |
| if err != nil { |
| break |
| } |
| } |
| if _, ok := err.(*SyntaxError); !ok { |
| t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err) |
| } |
| } |
| } |
| |
| func TestIssue12417(t *testing.T) { |
| testCases := []struct { |
| s string |
| ok bool |
| }{ |
| {`<?xml encoding="UtF-8" version="1.0"?><root/>`, true}, |
| {`<?xml encoding="UTF-8" version="1.0"?><root/>`, true}, |
| {`<?xml encoding="utf-8" version="1.0"?><root/>`, true}, |
| {`<?xml encoding="uuu-9" version="1.0"?><root/>`, false}, |
| } |
| for _, tc := range testCases { |
| d := NewDecoder(strings.NewReader(tc.s)) |
| var err error |
| for { |
| _, err = d.Token() |
| if err != nil { |
| if err == io.EOF { |
| err = nil |
| } |
| break |
| } |
| } |
| if err != nil && tc.ok { |
| t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err) |
| continue |
| } |
| if err == nil && !tc.ok { |
| t.Errorf("%q: Encoding charset: expected error, got nil", tc.s) |
| } |
| } |
| } |
| |
| func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader { |
| return func(src TokenReader) TokenReader { |
| return mapper{ |
| t: src, |
| f: mapping, |
| } |
| } |
| } |
| |
| type mapper struct { |
| t TokenReader |
| f func(Token) Token |
| } |
| |
| func (m mapper) Token() (Token, error) { |
| tok, err := m.t.Token() |
| if err != nil { |
| return nil, err |
| } |
| return m.f(tok), nil |
| } |
| |
| func TestNewTokenDecoderIdempotent(t *testing.T) { |
| d := NewDecoder(strings.NewReader(`<br>`)) |
| d2 := NewTokenDecoder(d) |
| if d != d2 { |
| t.Error("NewTokenDecoder did not detect underlying Decoder") |
| } |
| } |
| |
| func TestWrapDecoder(t *testing.T) { |
| d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`)) |
| m := tokenMap(func(t Token) Token { |
| switch tok := t.(type) { |
| case StartElement: |
| if tok.Name.Local == "quote" { |
| tok.Name.Local = "blocking" |
| return tok |
| } |
| case EndElement: |
| if tok.Name.Local == "quote" { |
| tok.Name.Local = "blocking" |
| return tok |
| } |
| } |
| return t |
| }) |
| |
| d = NewTokenDecoder(m(d)) |
| |
| o := struct { |
| XMLName Name `xml:"blocking"` |
| Chardata string `xml:",chardata"` |
| }{} |
| |
| if err := d.Decode(&o); err != nil { |
| t.Fatal("Got unexpected error while decoding:", err) |
| } |
| |
| if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" { |
| t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata) |
| } |
| } |
| |
| type tokReader struct{} |
| |
| func (tokReader) Token() (Token, error) { |
| return StartElement{}, nil |
| } |
| |
| type Failure struct{} |
| |
| func (Failure) UnmarshalXML(*Decoder, StartElement) error { |
| return nil |
| } |
| |
| func TestTokenUnmarshaler(t *testing.T) { |
| defer func() { |
| if r := recover(); r != nil { |
| t.Error("Unexpected panic using custom token unmarshaler") |
| } |
| }() |
| |
| d := NewTokenDecoder(tokReader{}) |
| d.Decode(&Failure{}) |
| } |
| |
| func testRoundTrip(t *testing.T, input string) { |
| d := NewDecoder(strings.NewReader(input)) |
| var tokens []Token |
| var buf bytes.Buffer |
| e := NewEncoder(&buf) |
| for { |
| tok, err := d.Token() |
| if err == io.EOF { |
| break |
| } |
| if err != nil { |
| t.Fatalf("invalid input: %v", err) |
| } |
| if err := e.EncodeToken(tok); err != nil { |
| t.Fatalf("failed to re-encode input: %v", err) |
| } |
| tokens = append(tokens, CopyToken(tok)) |
| } |
| if err := e.Flush(); err != nil { |
| t.Fatal(err) |
| } |
| |
| d = NewDecoder(&buf) |
| for { |
| tok, err := d.Token() |
| if err == io.EOF { |
| break |
| } |
| if err != nil { |
| t.Fatalf("failed to decode output: %v", err) |
| } |
| if len(tokens) == 0 { |
| t.Fatalf("unexpected token: %#v", tok) |
| } |
| a, b := tokens[0], tok |
| if !reflect.DeepEqual(a, b) { |
| t.Fatalf("token mismatch: %#v vs %#v", a, b) |
| } |
| tokens = tokens[1:] |
| } |
| if len(tokens) > 0 { |
| t.Fatalf("lost tokens: %#v", tokens) |
| } |
| } |
| |
| func TestRoundTrip(t *testing.T) { |
| tests := map[string]string{ |
| "leading colon": `<::Test ::foo="bar"><:::Hello></:::Hello><Hello></Hello></::Test>`, |
| "trailing colon": `<foo abc:="x"></foo>`, |
| "double colon": `<x:y:foo></x:y:foo>`, |
| "comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`, |
| } |
| for name, input := range tests { |
| t.Run(name, func(t *testing.T) { testRoundTrip(t, input) }) |
| } |
| } |
| |
| func TestParseErrors(t *testing.T) { |
| withDefaultHeader := func(s string) string { |
| return `<?xml version="1.0" encoding="UTF-8"?>` + s |
| } |
| tests := []struct { |
| src string |
| err string |
| }{ |
| {withDefaultHeader(`</foo>`), `unexpected end element </foo>`}, |
| {withDefaultHeader(`<x:foo></y:foo>`), `element <foo> in space x closed by </foo> in space y`}, |
| {withDefaultHeader(`<? not ok ?>`), `expected target name after <?`}, |
| {withDefaultHeader(`<!- not ok -->`), `invalid sequence <!- not part of <!--`}, |
| {withDefaultHeader(`<!-? not ok -->`), `invalid sequence <!- not part of <!--`}, |
| {withDefaultHeader(`<![not ok]>`), `invalid <![ sequence`}, |
| {withDefaultHeader("\xf1"), `invalid UTF-8`}, |
| |
| // Header-related errors. |
| {`<?xml version="1.1" encoding="UTF-8"?>`, `unsupported version "1.1"; only version 1.0 is supported`}, |
| |
| // Cases below are for "no errors". |
| {withDefaultHeader(`<?ok?>`), ``}, |
| {withDefaultHeader(`<?ok version="ok"?>`), ``}, |
| } |
| |
| for _, test := range tests { |
| d := NewDecoder(strings.NewReader(test.src)) |
| var err error |
| for { |
| _, err = d.Token() |
| if err != nil { |
| break |
| } |
| } |
| if test.err == "" { |
| if err != io.EOF { |
| t.Errorf("parse %s: have %q error, expected none", test.src, err) |
| } |
| continue |
| } |
| if err == nil || err == io.EOF { |
| t.Errorf("parse %s: have no error, expected a non-nil error", test.src) |
| continue |
| } |
| if !strings.Contains(err.Error(), test.err) { |
| t.Errorf("parse %s: can't find %q error sudbstring\nerror: %q", test.src, test.err, err) |
| continue |
| } |
| } |
| } |
| |
| const testInputHTMLAutoClose = `<?xml version="1.0" encoding="UTF-8"?> |
| <br> |
| <br/><br/> |
| <br><br> |
| <br></br> |
| <BR> |
| <BR/><BR/> |
| <Br></Br> |
| <BR><span id="test">abc</span><br/><br/>` |
| |
| func BenchmarkHTMLAutoClose(b *testing.B) { |
| b.RunParallel(func(p *testing.PB) { |
| for p.Next() { |
| d := NewDecoder(strings.NewReader(testInputHTMLAutoClose)) |
| d.Strict = false |
| d.AutoClose = HTMLAutoClose |
| d.Entity = HTMLEntity |
| for { |
| _, err := d.Token() |
| if err != nil { |
| if err == io.EOF { |
| break |
| } |
| b.Fatalf("unexpected error: %v", err) |
| } |
| } |
| } |
| }) |
| } |
| |
| func TestHTMLAutoClose(t *testing.T) { |
| wantTokens := []Token{ |
| ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, |
| CharData("\n"), |
| StartElement{Name{"", "br"}, []Attr{}}, |
| EndElement{Name{"", "br"}}, |
| CharData("\n"), |
| StartElement{Name{"", "br"}, []Attr{}}, |
| EndElement{Name{"", "br"}}, |
| StartElement{Name{"", "br"}, []Attr{}}, |
| EndElement{Name{"", "br"}}, |
| CharData("\n"), |
| StartElement{Name{"", "br"}, []Attr{}}, |
| EndElement{Name{"", "br"}}, |
| StartElement{Name{"", "br"}, []Attr{}}, |
| EndElement{Name{"", "br"}}, |
| CharData("\n"), |
| StartElement{Name{"", "br"}, []Attr{}}, |
| EndElement{Name{"", "br"}}, |
| CharData("\n"), |
| StartElement{Name{"", "BR"}, []Attr{}}, |
| EndElement{Name{"", "BR"}}, |
| CharData("\n"), |
| StartElement{Name{"", "BR"}, []Attr{}}, |
| EndElement{Name{"", "BR"}}, |
| StartElement{Name{"", "BR"}, []Attr{}}, |
| EndElement{Name{"", "BR"}}, |
| CharData("\n"), |
| StartElement{Name{"", "Br"}, []Attr{}}, |
| EndElement{Name{"", "Br"}}, |
| CharData("\n"), |
| StartElement{Name{"", "BR"}, []Attr{}}, |
| EndElement{Name{"", "BR"}}, |
| StartElement{Name{"", "span"}, []Attr{{Name: Name{"", "id"}, Value: "test"}}}, |
| CharData("abc"), |
| EndElement{Name{"", "span"}}, |
| StartElement{Name{"", "br"}, []Attr{}}, |
| EndElement{Name{"", "br"}}, |
| StartElement{Name{"", "br"}, []Attr{}}, |
| EndElement{Name{"", "br"}}, |
| } |
| |
| d := NewDecoder(strings.NewReader(testInputHTMLAutoClose)) |
| d.Strict = false |
| d.AutoClose = HTMLAutoClose |
| d.Entity = HTMLEntity |
| var haveTokens []Token |
| for { |
| tok, err := d.Token() |
| if err != nil { |
| if err == io.EOF { |
| break |
| } |
| t.Fatalf("unexpected error: %v", err) |
| } |
| haveTokens = append(haveTokens, CopyToken(tok)) |
| } |
| if len(haveTokens) != len(wantTokens) { |
| t.Errorf("tokens count mismatch: have %d, want %d", len(haveTokens), len(wantTokens)) |
| } |
| for i, want := range wantTokens { |
| if i >= len(haveTokens) { |
| t.Errorf("token[%d] expected %#v, have no token", i, want) |
| } else { |
| have := haveTokens[i] |
| if !reflect.DeepEqual(have, want) { |
| t.Errorf("token[%d] mismatch:\nhave: %#v\nwant: %#v", i, have, want) |
| } |
| } |
| } |
| } |