Reputation: 189
I want to split an simple String by their capital letters into an array. It should look something like this:
let teststring = "NaCuHHe"
and the result should be:
["Na", "Cu", "H", "He"]
I tried the following:
func capitalLetters(s: String) -> [Character] {
return s.characters.filter { ("A"..."Z").contains($0) }
}
I searched trough the documentation and other websites but i did not find any helpful things. Im at the end. I don't know what to do more hence im really new to swift. It still gives me only the capital ones and i don't know how to change that it gives me the things behind de capital one as well.
Upvotes: 14
Views: 4832
Reputation: 320
The simplest would be to use chunked(by:)
from Swift Algorithms:
import Algorithms
let chunks = "HereIAmAgain".chunked { !$1.isUppercase }
Alternatively, you can search for indices of uppercased characters and then zip()
adjacent indices into an array of substrings:
extension StringProtocol {
func splitUppercased() -> [SubSequence] {
let splitIndices = indices.dropFirst().filter { self[$0].isUppercase }
return zip([startIndex] + splitIndices, splitIndices + [endIndex])
.map { self[$0..<$1] }
}
}
Upvotes: 0
Reputation: 236380
A little bit late for the party but I think it is still worth posting it.
A different approach using unfolded sequences:
extension Collection {
func subSequences(upTo predicate: @escaping (Element) -> Bool) -> [SubSequence] {
.init(unfoldedSequences(upTo: predicate))
}
func unfoldedSequences(upTo predicate: @escaping (Element) -> Bool) -> UnfoldSequence<SubSequence, Index> {
sequence(state: startIndex) { lower in
guard lower < endIndex else { return nil }
let upper = self[index(after: lower)...].firstIndex(where: predicate) ?? endIndex
defer { lower = upper }
return self[lower..<upper]
}
}
}
Playground testing
let string = "NaCuHHe"
for unfoldedSequence in string.unfoldedSequences(upTo: \.isUppercase) {
print(unfoldedSequence)
}
let subSequences = string.subSequences(upTo: \.isUppercase)
print(subSequences)
This will print:
Na
Cu
H
He
["Na", "Cu", "H", "He"]
Upvotes: 0
Reputation: 9858
str.map({ $0.isUppercase ? " \($0)" : "\($0)" }).joined().split(separator: " ")
Upvotes: 0
Reputation: 291
Inspired from Apple implementation for split, split function in Collection.
What i did is to change how index for subsequences are created.
In original implementation, when you hit a separator:
in my implementation:
form a new index after the separator
extension Collection {
@inlinable
public __consuming func split(
maxSplits: Int = Int.max,
omittingEmptySubsequences: Bool = true,
includeSeparator: Bool = false,
whereSeparator isSeparator: (Element) throws -> Bool
) rethrows -> [SubSequence] {
var result: [SubSequence] = []
var subSequenceStart: Index = startIndex
func appendSubsequence(end: Index) -> Bool {
if subSequenceStart == end && omittingEmptySubsequences {
return false
}
result.append(self[subSequenceStart..<end])
return true
}
if maxSplits == 0 || isEmpty {
_ = appendSubsequence(end: endIndex)
return result
}
var subSequenceEnd = subSequenceStart
let cachedEndIndex = endIndex
while subSequenceEnd != cachedEndIndex {
if try isSeparator(self[subSequenceEnd]) {
let didAppend = appendSubsequence(end: subSequenceEnd)
if includeSeparator {
subSequenceStart = subSequenceEnd
formIndex(after: &subSequenceEnd)
} else {
formIndex(after: &subSequenceEnd)
subSequenceStart = subSequenceEnd
}
if didAppend && result.count == maxSplits {
break
}
continue
}
formIndex(after: &subSequenceEnd)
}
if subSequenceStart != cachedEndIndex || !omittingEmptySubsequences {
result.append(self[subSequenceStart..<cachedEndIndex])
}
return result
}
The result of test is:
let splitedString = "NaCuHHe".split(includeSeparator: true, whereSeparator: { $0.isUppercase })
print(splitedString) // ["Na", "Cu", "H", "He"]
Upvotes: 0
Reputation: 415
Using Regex -
func splitYourString(_ s: String) ->[String] {
let regex = try! NSRegularExpression(pattern: "([a-z]*)([A-Z])") //<-Use capturing, `([a-z]*)`->$1, `([A-Z])`->$2
return regex.stringByReplacingMatches(in: s, range: NSRange(0..<s.utf16.count),
withTemplate: "$1 $2").trimmingCharacters(in: .whitespacesAndNewlines) .components(separatedBy: " ")
}
print(splitYourString("NaCuHHe")) //["Na", "Cu", "H", "He"]
Upvotes: 5
Reputation: 1
Answer in Swift 4
Another approach is popping the last word (which starts with capital case) to the result list until there is no more capital character is found in the given string.
extension String {
func splitWord() -> [String] {
var result = [String]()
var temp = self
var done = false
while !done {
if let index = temp.lastIndex(where: { $0.isUppercase }) {
result.insert(String(temp[index...]), at: 0)
temp = String(temp[..<index])
done = temp.distance(from: temp.startIndex, to: index) == 0
}
else {
result.insert(temp, at: 0)
done = true
}
}
return result
}
}
Upvotes: 0
Reputation: 120
dfri's answer which is great modified for Swift 4
his Sequence extension is the same, but extensions for as used are different:
extension Sequence {
func splitBefore(
separator isSeparator: (Iterator.Element) throws -> Bool
) rethrows -> [AnySequence<Iterator.Element>] {
var result: [AnySequence<Iterator.Element>] = []
var subSequence: [Iterator.Element] = []
var iterator = self.makeIterator()
while let element = iterator.next() {
if try isSeparator(element) {
if !subSequence.isEmpty {
result.append(AnySequence(subSequence))
}
subSequence = [element]
}
else {
subSequence.append(element)
}
}
result.append(AnySequence(subSequence))
return result
}
}
string extension for use:
extension String {
var isLowercase: Bool {
return self == self.lowercased()
}
var isUppercase: Bool {
return self == self.uppercased()
}
and then used as follows since characters has been deprecated is Swift 4
let teststring = "NaCuHHe"
let splitted = teststring
.splitBefore(separator: { $0.isUppercase })
.map{String($0)}
print(splitted) // ["Na", "Cu", "H", "He"]
Upvotes: 2
Reputation: 5190
A bit late to the party, but here's a straightforward Swift 3 approach using whitespace-delimiting. Might not be as elegant as the functional or iterator approaches, but it doesn't involve making any custom extensions on existing classes.
let originalString = "NaCuHHe"
var newStringArray: [String] = []
for character in originalString.characters {
if String(character) == String(character).uppercased() {
newStringArray.append(" ")
}
newStringArray.append(String(character))
}
let newString = newStringArray.joined().trimmingCharacters(in: .whitespacesAndNewlines).components(separatedBy: " ")
print(newString) // Returns ["Na", "Cu", "H", "He"]
Upvotes: 3
Reputation: 59506
A different solution in Functional Programming style
First of all lets define an easy method to check whether a Character is uppercase
extension Character {
var isUppercase: Bool { return String(self).uppercased() == String(self) }
}
Next we need the indexes of the uppercase characters
let indexes = Set(text
.characters
.enumerated()
.filter { $0.element.isUppercase }
.map { $0.offset })
Now we can build the result
let chunks = text
.characters
.map { String($0) }
.enumerated()
.reduce([String]()) { chunks, elm -> [String] in
guard !chunks.isEmpty else { return [elm.element] }
guard !indexes.contains(elm.offset) else { return chunks + [String(elm.element)] }
var chunks = chunks
chunks[chunks.count-1] += String(elm.element)
return chunks
}
["Na", "Cu", "H", "He"]
Upvotes: 6
Reputation: 73186
(Swift 3)
We could let ourselves be inspired by the implementation of the split
function in Sequence
, and implement our own splitBefore
method (split before separator, omitting empty subsequences), that keep the separators in the splitted sequence.
extension Sequence {
func splitBefore(
separator isSeparator: (Iterator.Element) throws -> Bool
) rethrows -> [AnySequence<Iterator.Element>] {
var result: [AnySequence<Iterator.Element>] = []
var subSequence: [Iterator.Element] = []
var iterator = self.makeIterator()
while let element = iterator.next() {
if try isSeparator(element) {
if !subSequence.isEmpty {
result.append(AnySequence(subSequence))
}
subSequence = [element]
}
else {
subSequence.append(element)
}
}
result.append(AnySequence(subSequence))
return result
}
}
Used as follows
/* help property */
extension Character {
var isUpperCase: Bool { return String(self) == String(self).uppercased() }
}
/* example usage */
let teststring = "NaCuHHe"
let splitted = teststring
.characters
.splitBefore(separator: { $0.isUpperCase })
.map{String($0)}
print(splitted) // ["Na", "Cu", "H", "He"]
Upvotes: 11
Reputation: 21137
Another way would be:
let input = "NaCuHHe"
var result = [String]()
var lastIndex = 0
for (index, letter) in input.characters.enumerate() where String(letter) != String(letter).lowercaseString {
guard index != lastIndex else { continue }
result += [input.substringWithRange(input.startIndex.advancedBy(lastIndex) ..< input.startIndex.advancedBy(index))]
lastIndex = index
}
result += [input.substringWithRange(input.startIndex.advancedBy(lastIndex) ..< input.endIndex)]
result is ["Na", "Cu", "H", "He"]
Upvotes: 3