Reputation: 8391
This might be a duplicate. I couldn't find the answer in Swift, so I am not sure.
componentsSeparatedByCharactersInSet
removes the delimiter. If you separate by only one possible character it is easy to add it back. But what when you have a set?
Is there another method to split?
Upvotes: 3
Views: 2147
Reputation: 453
I came here looking for an answer to this question. Didn't find what I was looking for and ended up building this by repeated calls to .split(...) It isn't elegant but you can choose which delimiters are preserved and which aren't. There's probably a way to avoid the String <--> Substring conversions, anyone know?
var input = """
{All those moments will be (lost in time)},
like tears [in rain](. ([(Time to)] die))
"""
var separator: Character = "!"
var output: [String] = []
repeat {
let tokens = input.split(
maxSplits: 1,
omittingEmptySubsequences: false,
whereSeparator: {
switch $0 {
case "{", "}", "(", ")", "[", "]": // preserve
separator = $0; return true
case " ", "\n", ",", ".": // omit
separator = " "; return true
default:
return false
}
}
)
if tokens[0] != "" {
output.append(String(tokens[0]))
}
guard tokens.count == 2 else { break }
if separator != " " {
output.append(String(separator))
}
input = String(tokens[1])
} while true
for token in output { print("\(token)") }
In the case above, the selectors are not in actual sets. I didn't need that, but if you do, simply make these declarations,
let preservedDelimiters: Set<Character> = [ "{", "}", "(", ")", "[", "]" ]
let omittedDelimiters: Set<Character> = [ " ", "\n", ",", "." ]
and replace the whereSeparator function with:
whereSeparator: {
if preservedDelimiters.contains($0) {
separator = $0
return true
} else if omittedDelimiters.contains($0) {
separator = " "
return true
} else {
return false
}
}
Upvotes: 0
Reputation: 121
extension Collection {
func splitAt(isSplit: (Iterator.Element) throws -> Bool) rethrows -> [SubSequence] {
var p = self.startIndex
var result:[SubSequence] = try self.indices.flatMap {
i in
guard try isSplit(self[i]) else {
return nil
}
defer {
p = self.index(after: i)
}
return self[p...i]
}
if p != self.endIndex {
result.append(suffix(from: p))
}
return result
}
}
Thanks to Oisdk for getting me thinking.
Upvotes: 4
Reputation: 10091
This method works on CollectionTypes
, rather than String
s, but it should be easy enough to adapt:
extension CollectionType {
func splitAt(@noescape isSplit: Generator.Element throws -> Bool) rethrows -> [SubSequence] {
var p = startIndex
return try indices
.filter { i in try isSplit(self[i]) }
.map { i in
defer { p = i }
return self[p..<i]
} + [suffixFrom(p)]
}
}
extension CollectionType where Generator.Element : Equatable {
func splitAt(splitter: Generator.Element) -> [SubSequence] {
return splitAt { el in el == splitter }
}
}
You could use it like this:
let sentence = "Hello, my name is oisdk. This should split: but only at punctuation!"
let puncSet = Set("!.,:".characters)
sentence
.characters
.splitAt(puncSet.contains)
.map(String.init)
// ["Hello", ", my name is oisdk", ". This should split", ": but only at punctuation", "!"]
Or, this version, which uses a for-loop, and splits after the delimiter:
extension CollectionType {
func splitAt(@noescape isSplit: Generator.Element throws -> Bool) rethrows -> [SubSequence] {
var p = startIndex
var result: [SubSequence] = []
for i in indices where try isSplit(self[i]) {
result.append(self[p...i])
p = i.successor()
}
if p != endIndex { result.append(suffixFrom(p)) }
return result
}
}
extension CollectionType where Generator.Element : Equatable {
func splitAt(splitter: Generator.Element) -> [SubSequence] {
return splitAt { el in el == splitter }
}
}
let sentence = "Hello, my name is oisdk. This should split: but only at punctuation!"
let puncSet = Set("!.,:".characters)
sentence
.characters
.splitAt(puncSet.contains)
.map(String.init)
// ["Hello,", " my name is oisdk.", " This should split:", " but only at punctuation!"]
Or, if you wanted to get the most Swift features into one function (defer
, throws
, a Protocol extension, an evil flatMap
, guard
, and Optionals):
extension CollectionType {
func splitAt(@noescape isSplit: Generator.Element throws -> Bool) rethrows -> [SubSequence] {
var p = startIndex
var result: [SubSequence] = try indices.flatMap { i in
guard try isSplit(self[i]) else { return nil }
defer { p = i.successor() }
return self[p...i]
}
if p != endIndex { result.append(suffixFrom(p)) }
return result
}
}
Upvotes: 3