MOBlox
MOBlox

Reputation: 301

Splitting a string at Space, except inside quotation marks

I was wondering if there is any way I could easily split a string at spaces, except when the space is inside quotation marks?

For example, changing

Foo bar random "letters lol" stuff

into

Foo, bar, random, "letters lol", stuff

Upvotes: 17

Views: 8407

Answers (4)

ardnew
ardnew

Reputation: 2086

Generalizing the question a little bit, this solution allows for several different quote delimiters (', ", etc.) and field delimiters ( , ,, etc.).

Test it online

package main

import (
    "fmt"
    "strings"
)

func Split(s string, quote []rune, delim []rune) []string {
    const none = '\000'
    open := none
    return strings.FieldsFunc(s, func(r rune) bool {
        switch {
        case open == none:
            if strings.ContainsRune(string(quote), r) {
                open = r
                return false
            }
            return strings.ContainsRune(string(delim), r)
        case open == r:
            open = none
        }
        return false
    })
}

func ExampleSplit() {
    s := `Foo 'bar, "ran"dom' "le'tt'er's lol"stuff,xyz`
    a := Split(s, []rune(`'"`), []rune(" ,"))
    fmt.Println(strings.Join(a, "|"))
    // Output: Foo|'bar, "ran"dom'|"le'tt'er's lol"stuff|xyz
}

Limitations

  1. It does not remove the syntactical quote symbols from its output (it only removes the field delimiters).
  2. It does not support arbitrary-length strings as any single quote delimiter or field delimiter; they must be runes.

Both limitations would be trivial to fix by replacing the strings.ContainsRune and strings.FieldsFunc calls with simple loops.

Upvotes: 0

wasmup
wasmup

Reputation: 16223

  1. Using strings.FieldsFunc try this:
package main

import (
    "fmt"
    "strings"
)

func main() {
    s := `Foo bar random "letters lol" stuff`
    quoted := false
    a := strings.FieldsFunc(s, func(r rune) bool {
        if r == '"' {
            quoted = !quoted
        }
        return !quoted && r == ' '
    })

    out := strings.Join(a, ", ")
    fmt.Println(out) // Foo, bar, random, "letters lol", stuff
}

  1. Using simple strings.Builder and range over string and keeping or not keeping " at your will, try this
package main

import (
    "fmt"
    "strings"
)

func main() {
    s := `Foo bar random "letters lol" stuff`
    a := []string{}
    sb := &strings.Builder{}
    quoted := false
    for _, r := range s {
        if r == '"' {
            quoted = !quoted
            sb.WriteRune(r) // keep '"' otherwise comment this line
        } else if !quoted && r == ' ' {
            a = append(a, sb.String())
            sb.Reset()
        } else {
            sb.WriteRune(r)
        }
    }
    if sb.Len() > 0 {
        a = append(a, sb.String())
    }

    out := strings.Join(a, ", ")
    fmt.Println(out) // Foo, bar, random, "letters lol", stuff
    // not keep '"': // Foo, bar, random, letters lol, stuff
}


  1. Using scanner.Scanner, try this:
package main

import (
    "fmt"
    "strings"
    "text/scanner"
)

func main() {
    var s scanner.Scanner
    s.Init(strings.NewReader(`Foo bar random "letters lol" stuff`))
    slice := make([]string, 0, 5)
    tok := s.Scan()
    for tok != scanner.EOF {
        slice = append(slice, s.TokenText())
        tok = s.Scan()
    }
    out := strings.Join(slice, ", ")
    fmt.Println(out) // Foo, bar, random, "letters lol", stuff
}

  1. Using csv.NewReader which removes " itself, try this:
package main

import (
    "encoding/csv"
    "fmt"
    "log"
    "strings"
)

func main() {
    s := `Foo bar random "letters lol" stuff`
    r := csv.NewReader(strings.NewReader(s))
    r.Comma = ' '
    record, err := r.Read()
    if err != nil {
        log.Fatal(err)
    }

    out := strings.Join(record, ", ")
    fmt.Println(out) // Foo, bar, random, letters lol, stuff
}

  1. Using regexp, try this:
package main

import (
    "fmt"
    "regexp"
    "strings"
)

func main() {
    s := `Foo bar random "letters lol" stuff`

    r := regexp.MustCompile(`[^\s"]+|"([^"]*)"`)
    a := r.FindAllString(s, -1)

    out := strings.Join(a, ", ")
    fmt.Println(out) // Foo, bar, random, "letters lol", stuff
}

Upvotes: 14

Blue Bot
Blue Bot

Reputation: 2438

You could use regex

This (go playground) will cover all use cases for multiple words inside quotes and multiple quoted entries in your array:

package main

import (
    "fmt"
    "regexp"
)

func main() {
    s := `Foo bar random "letters lol" stuff "also will" work on "multiple quoted stuff"`       
    r := regexp.MustCompile(`[^\s"']+|"([^"]*)"|'([^']*)`) 
    arr := r.FindAllString(s, -1)       
    fmt.Println("your array: ", arr)    
}

Output will be:

[Foo, bar, random, "letters lol", stuff, "also will", work, on, "multiple quoted stuff"]

If you want to learn more about regex here is a great SO answer with super handy resources at the end - Learning Regular Expressions

Hope this helps

Upvotes: 2

peterSO
peterSO

Reputation: 166569

Think about it. You have a string in comma separated values (CSV) file format, RFC4180, except that your separator, outside quote pairs, is a space (instead of a comma). For example,

package main

import (
    "encoding/csv"
    "fmt"
    "strings"
)

func main() {
    s := `Foo bar random "letters lol" stuff`
    fmt.Printf("String:\n%q\n", s)

    // Split string
    r := csv.NewReader(strings.NewReader(s))
    r.Comma = ' ' // space
    fields, err := r.Read()
    if err != nil {
        fmt.Println(err)
        return
    }

    fmt.Printf("\nFields:\n")
    for _, field := range fields {
        fmt.Printf("%q\n", field)
    }
}

Playground: https://play.golang.org/p/Ed4IV97L7H

Output:

String:
"Foo bar random \"letters lol\" stuff"

Fields:
"Foo"
"bar"
"random"
"letters lol"
"stuff"

Upvotes: 18

Related Questions