Implement Split()
diff --git a/README b/README
index 87bb00d..4d34e87 100644
--- a/README
+++ b/README
@@ -6,6 +6,15 @@
Shellquote provides utilities for joining/splitting strings using sh's
word-splitting rules.
+VARIABLES
+
+var (
+ UnterminatedSingleQuoteError = errors.New("Unterminated single-quoted string")
+ UnterminatedDoubleQuoteError = errors.New("Unterminated double-quoted string")
+ UnterminatedEscapeError = errors.New("Unterminated backslash-escape")
+)
+
+
FUNCTIONS
func Join(args ...string) string
@@ -13,4 +22,15 @@
/bin/sh, the resulting string will be split back into the original
arguments.
+func Split(input string) (words []string, err error)
+ Split splits a string according to /bin/sh's word-splitting rules. It
+ supports backslash-escapes, single-quotes, and double-quotes. Notably it
+ does not support the $'' style of quoting. It also doesn't attempt to
+ perform any other sort of expansion, including brace expansion, shell
+ expansion, or pathname expansion.
+
+ If the given input has an unterminated quoted string or ends in a
+ backslash-escape, one of UnterminatedSingleQuoteError,
+ UnterminatedDoubleQuoteError, or UnterminatedEscapeError is returned.
+
diff --git a/both_test.go b/both_test.go
new file mode 100644
index 0000000..9cba3c8
--- /dev/null
+++ b/both_test.go
@@ -0,0 +1,29 @@
+package shellquote
+
+import (
+ "reflect"
+ "testing"
+ "testing/quick"
+)
+
+// this is called bothtest because it tests Split and Join together
+
+func TestJoinSplit(t *testing.T) {
+ f := func(strs []string) bool {
+ // Join, then split, the input
+ combined := Join(strs...)
+ split, err := Split(combined)
+ if err != nil {
+ t.Logf("Error splitting %#v: %v", combined, err)
+ return false
+ }
+ if !reflect.DeepEqual(strs, split) {
+ t.Logf("Input %q did not match output %q", strs, split)
+ return false
+ }
+ return true
+ }
+ if err := quick.Check(f, nil); err != nil {
+ t.Error(err)
+ }
+}
diff --git a/unquote.go b/unquote.go
new file mode 100644
index 0000000..ba3a0f2
--- /dev/null
+++ b/unquote.go
@@ -0,0 +1,144 @@
+package shellquote
+
+import (
+ "bytes"
+ "errors"
+ "strings"
+ "unicode/utf8"
+)
+
+var (
+ UnterminatedSingleQuoteError = errors.New("Unterminated single-quoted string")
+ UnterminatedDoubleQuoteError = errors.New("Unterminated double-quoted string")
+ UnterminatedEscapeError = errors.New("Unterminated backslash-escape")
+)
+
+var (
+ splitChars = " \n\t"
+ singleChar = '\''
+ doubleChar = '"'
+ escapeChar = '\\'
+ doubleEscapeChars = "$`\"\n\\"
+)
+
+// Split splits a string according to /bin/sh's word-splitting rules. It
+// supports backslash-escapes, single-quotes, and double-quotes. Notably it does
+// not support the $'' style of quoting. It also doesn't attempt to perform any
+// other sort of expansion, including brace expansion, shell expansion, or
+// pathname expansion.
+//
+// If the given input has an unterminated quoted string or ends in a
+// backslash-escape, one of UnterminatedSingleQuoteError,
+// UnterminatedDoubleQuoteError, or UnterminatedEscapeError is returned.
+func Split(input string) (words []string, err error) {
+ var buf bytes.Buffer
+ words = make([]string, 0)
+
+ for len(input) > 0 {
+ // skip any splitChars at the start
+ c, l := utf8.DecodeRuneInString(input)
+ if strings.ContainsRune(splitChars, c) {
+ input = input[l:]
+ continue
+ }
+
+ var word string
+ word, input, err = splitWord(input, &buf)
+ if err != nil {
+ return
+ }
+ words = append(words, word)
+ }
+ return
+}
+
+func splitWord(input string, buf *bytes.Buffer) (word string, remainder string, err error) {
+ buf.Reset()
+
+raw:
+ {
+ cur := input
+ for len(cur) > 0 {
+ c, l := utf8.DecodeRuneInString(cur)
+ cur = cur[l:]
+ if c == singleChar {
+ buf.WriteString(input[0 : len(input)-len(cur)-l])
+ input = cur
+ goto single
+ } else if c == doubleChar {
+ buf.WriteString(input[0 : len(input)-len(cur)-l])
+ input = cur
+ goto double
+ } else if c == escapeChar {
+ buf.WriteString(input[0 : len(input)-len(cur)-l])
+ input = cur
+ goto escape
+ } else if strings.ContainsRune(splitChars, c) {
+ buf.WriteString(input[0 : len(input)-len(cur)-l])
+ return buf.String(), cur, nil
+ }
+ }
+ if len(input) > 0 {
+ buf.WriteString(input)
+ input = ""
+ }
+ goto done
+ }
+
+escape:
+ {
+ if len(input) == 0 {
+ return "", "", UnterminatedEscapeError
+ }
+ c, l := utf8.DecodeRuneInString(input)
+ if c == '\n' {
+ // a backslash-escaped newline is elided from the output entirely
+ } else {
+ buf.WriteString(input[:l])
+ }
+ input = input[l:]
+ }
+ goto raw
+
+single:
+ {
+ i := strings.IndexRune(input, singleChar)
+ if i == -1 {
+ return "", "", UnterminatedSingleQuoteError
+ }
+ buf.WriteString(input[0:i])
+ input = input[i+1:]
+ goto raw
+ }
+
+double:
+ {
+ cur := input
+ for len(cur) > 0 {
+ c, l := utf8.DecodeRuneInString(cur)
+ cur = cur[l:]
+ if c == doubleChar {
+ buf.WriteString(input[0 : len(input)-len(cur)-l])
+ input = cur
+ goto raw
+ } else if c == escapeChar {
+ // bash only supports certain escapes in double-quoted strings
+ c2, l2 := utf8.DecodeRuneInString(cur)
+ cur = cur[l2:]
+ if strings.ContainsRune(doubleEscapeChars, c2) {
+ buf.WriteString(input[0 : len(input)-len(cur)-l-l2])
+ if c2 == '\n' {
+ // newline is special, skip the backslash entirely
+ } else {
+ buf.WriteRune(c2)
+ }
+ input = cur
+ }
+ }
+ }
+ return "", "", UnterminatedDoubleQuoteError
+ }
+
+done:
+ return buf.String(), input, nil
+}
diff --git a/unquote_test.go b/unquote_test.go
new file mode 100644
index 0000000..32ea514
--- /dev/null
+++ b/unquote_test.go
@@ -0,0 +1,52 @@
+package shellquote
+
+import (
+ "reflect"
+ "testing"
+)
+
+func TestSimpleSplit(t *testing.T) {
+ for _, elem := range simpleSplitTest {
+ output, err := Split(elem.input)
+ if err != nil {
+ t.Errorf("Input %q, got error %#v", elem.input, err)
+ } else if !reflect.DeepEqual(output, elem.output) {
+ t.Errorf("Input %q, got %q, expected %q", elem.input, output, elem.output)
+ }
+ }
+}
+
+func TestErrorSplit(t *testing.T) {
+ for _, elem := range errorSplitTest {
+ _, err := Split(elem.input)
+ if err != elem.error {
+ t.Errorf("Input %q, got error %#v, expected error %#v", elem.input, err, elem.error)
+ }
+ }
+}
+
+var simpleSplitTest = []struct {
+ input string
+ output []string
+}{
+ {"hello", []string{"hello"}},
+ {"hello goodbye", []string{"hello", "goodbye"}},
+ {"hello goodbye", []string{"hello", "goodbye"}},
+ {"glob* test?", []string{"glob*", "test?"}},
+ {"don\\'t you know the dewey decimal system\\?", []string{"don't", "you", "know", "the", "dewey", "decimal", "system?"}},
+ {"'don'\\''t you know the dewey decimal system?'", []string{"don't you know the dewey decimal system?"}},
+ {"one '' two", []string{"one", "", "two"}},
+ {"text with\\\na newline", []string{"text", "witha", "newline"}},
+ {"\"quoted\\d\\\\\\\" text with a\\\nnewline\"", []string{"quoted\\d\\\" text with anewline"}},
+ {"foo\"bar\"baz", []string{"foobarbaz"}},
+}
+
+var errorSplitTest = []struct {
+ input string
+ error error
+}{
+ {"don't worry", UnterminatedSingleQuoteError},
+ {"'test'\\''ing", UnterminatedSingleQuoteError},
+ {"\"foo'bar", UnterminatedDoubleQuoteError},
+ {"foo\\", UnterminatedEscapeError},
+}