| // Copyright 2017 Google Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| package tokenizer |
| |
| import ( |
| "reflect" |
| "testing" |
| ) |
| |
| func TestTokenizer_Tokenize(t *testing.T) { |
| tests := []struct { |
| text string |
| want Tokens |
| }{ |
| { |
| text: "Tokenize", |
| want: Tokens{&token{Text: "Tokenize", Offset: 0}}, |
| }, |
| { |
| text: "Hello world", |
| want: Tokens{ |
| &token{Text: "Hello", Offset: 0}, |
| &token{Text: "world", Offset: 6}, |
| }, |
| }, |
| { |
| text: `Goodnight, |
| Irene |
| `, |
| want: Tokens{ |
| &token{Text: "Goodnight", Offset: 0}, |
| &token{Text: ",", Offset: 9}, |
| &token{Text: "Irene", Offset: 11}, |
| }, |
| }, |
| { |
| text: "Copyright © 2017 Yoyodyne, Inc.", |
| want: Tokens{ |
| &token{Text: "Copyright", Offset: 0}, |
| &token{Text: "©", Offset: 10}, |
| &token{Text: "2017", Offset: 13}, |
| &token{Text: "Yoyodyne", Offset: 18}, |
| &token{Text: ",", Offset: 26}, |
| &token{Text: "Inc", Offset: 28}, |
| &token{Text: ".", Offset: 31}, |
| }, |
| }, |
| } |
| |
| for _, tt := range tests { |
| if got := Tokenize(tt.text); !reflect.DeepEqual(got, tt.want) { |
| t.Errorf("Tokenize(%q) = %+v, want %+v", tt.text, got, tt.want) |
| } |
| } |
| } |
| |
| func TestTokenizer_GenerateHashes(t *testing.T) { |
| tests := []struct { |
| text string |
| sizeFactor int |
| wantHash []uint32 |
| wantRanges TokenRanges |
| }{ |
| { |
| text: "", |
| sizeFactor: 1, |
| wantHash: nil, |
| wantRanges: nil, |
| }, |
| { |
| text: "Hashes", |
| sizeFactor: 1, |
| wantHash: []uint32{408116689}, |
| wantRanges: TokenRanges{{Start: 0, End: 1}}, |
| }, |
| { |
| text: "hello world", |
| sizeFactor: 1, |
| wantHash: []uint32{222957957}, |
| wantRanges: TokenRanges{{Start: 0, End: 2}}, |
| }, |
| { |
| text: "Copyright © 2017 Yoyodyne, Inc.", |
| sizeFactor: 3, |
| wantHash: []uint32{2473816729, 966085113, 3025678301, 3199087486, 850352802, 1274745089}, |
| wantRanges: TokenRanges{ |
| {Start: 0, End: 2}, |
| {Start: 1, End: 3}, |
| {Start: 2, End: 4}, |
| {Start: 3, End: 5}, |
| {Start: 4, End: 6}, |
| {Start: 5, End: 7}, |
| }, |
| }, |
| } |
| |
| for _, tt := range tests { |
| hash := make(Hash) |
| toks := Tokenize(tt.text) |
| h, tr := toks.GenerateHashes(hash, len(toks)/tt.sizeFactor) |
| if !reflect.DeepEqual(h, tt.wantHash) { |
| t.Errorf("GenerateHashes(hash) = %v, want %v", h, tt.wantHash) |
| } |
| if !reflect.DeepEqual(tr, tt.wantRanges) { |
| t.Errorf("GenerateHashes(ranges) = %v, want %v", tr, tt.wantRanges) |
| } |
| } |
| } |