Andreas
Andreas

Reputation: 189

Split String in Swift by their capital letters

I want to split an simple String by their capital letters into an array. It should look something like this:

let teststring = "NaCuHHe"

and the result should be:

["Na", "Cu", "H", "He"]

I tried the following:

func capitalLetters(s: String) -> [Character] {
    return s.characters.filter { ("A"..."Z").contains($0) }
}

I searched trough the documentation and other websites but i did not find any helpful things. Im at the end. I don't know what to do more hence im really new to swift. It still gives me only the capital ones and i don't know how to change that it gives me the things behind de capital one as well.

Upvotes: 14

Views: 4832

Answers (11)

Olex
Olex

Reputation: 320

The simplest would be to use chunked(by:) from Swift Algorithms:

import Algorithms

let chunks = "HereIAmAgain".chunked { !$1.isUppercase }

Alternatively, you can search for indices of uppercased characters and then zip() adjacent indices into an array of substrings:

extension StringProtocol {
    func splitUppercased() -> [SubSequence] {
        let splitIndices = indices.dropFirst().filter { self[$0].isUppercase }
        return zip([startIndex] + splitIndices, splitIndices + [endIndex])
            .map { self[$0..<$1] }
    }
}

Upvotes: 0

Leo Dabus
Leo Dabus

Reputation: 236380

A little bit late for the party but I think it is still worth posting it.
A different approach using unfolded sequences:

extension Collection {
    func subSequences(upTo predicate: @escaping (Element) -> Bool) -> [SubSequence] {
        .init(unfoldedSequences(upTo: predicate))
    }
    func unfoldedSequences(upTo predicate: @escaping (Element) -> Bool) -> UnfoldSequence<SubSequence, Index> {
        sequence(state: startIndex) { lower in
            guard lower < endIndex else { return nil }
            let upper = self[index(after: lower)...].firstIndex(where: predicate) ?? endIndex
            defer { lower = upper }
            return self[lower..<upper]
        }
    }
}

Playground testing

let string = "NaCuHHe"
for unfoldedSequence in string.unfoldedSequences(upTo: \.isUppercase) {
    print(unfoldedSequence)
}
let subSequences = string.subSequences(upTo: \.isUppercase)
print(subSequences)

This will print:

Na
Cu
H
He
["Na", "Cu", "H", "He"]

Upvotes: 0

quemeful
quemeful

Reputation: 9858

str.map({ $0.isUppercase ? " \($0)" : "\($0)" }).joined().split(separator: " ")

Upvotes: 0

Luci Aghergheloaei
Luci Aghergheloaei

Reputation: 291

Answer in Swift 5.

Inspired from Apple implementation for split, split function in Collection.

What i did is to change how index for subsequences are created.

In original implementation, when you hit a separator:

  1. add a new subsequence to the array of subsequences
  2. form a new index after the separator
  3. attribute that index to next subsequence start

in my implementation:

  1. add a new subsequence to the array of subsequences
  2. attribute current index to next subsequence start
  3. form a new index after the separator

    extension Collection {
    @inlinable
    public __consuming func split(
        maxSplits: Int = Int.max,
        omittingEmptySubsequences: Bool = true,
        includeSeparator: Bool = false,
        whereSeparator isSeparator: (Element) throws -> Bool
    ) rethrows -> [SubSequence] {
        var result: [SubSequence] = []
        var subSequenceStart: Index = startIndex
    
        func appendSubsequence(end: Index) -> Bool {
            if subSequenceStart == end && omittingEmptySubsequences {
                return false
            }
            result.append(self[subSequenceStart..<end])
            return true
        }
    
        if maxSplits == 0 || isEmpty {
            _ = appendSubsequence(end: endIndex)
            return result
        }
    
        var subSequenceEnd = subSequenceStart
        let cachedEndIndex = endIndex
        while subSequenceEnd != cachedEndIndex {
            if try isSeparator(self[subSequenceEnd]) {
                let didAppend = appendSubsequence(end: subSequenceEnd)
                if includeSeparator {
                    subSequenceStart = subSequenceEnd
                    formIndex(after: &subSequenceEnd)
                } else {
                    formIndex(after: &subSequenceEnd)
                    subSequenceStart = subSequenceEnd
                }
    
                if didAppend && result.count == maxSplits {
                    break
                }
                continue
            }
            formIndex(after: &subSequenceEnd)
        }
    
        if subSequenceStart != cachedEndIndex || !omittingEmptySubsequences {
            result.append(self[subSequenceStart..<cachedEndIndex])
        }
    
        return result
    }
    

    The result of test is:

       let splitedString = "NaCuHHe".split(includeSeparator: true, whereSeparator: { $0.isUppercase })
       print(splitedString) // ["Na", "Cu", "H", "He"]
    

Upvotes: 0

Abhishek singh
Abhishek singh

Reputation: 415

Using Regex -

func splitYourString(_ s: String) ->[String] {
let regex = try! NSRegularExpression(pattern: "([a-z]*)([A-Z])") //<-Use capturing, `([a-z]*)`->$1, `([A-Z])`->$2
return regex.stringByReplacingMatches(in: s, range: NSRange(0..<s.utf16.count),
                                      withTemplate: "$1 $2").trimmingCharacters(in: .whitespacesAndNewlines) .components(separatedBy: " ")
}
print(splitYourString("NaCuHHe"))     //["Na", "Cu", "H", "He"]

Upvotes: 5

Jacky Tay
Jacky Tay

Reputation: 1

Answer in Swift 4

Another approach is popping the last word (which starts with capital case) to the result list until there is no more capital character is found in the given string.

extension String {
    func splitWord() -> [String] {
        var result = [String]()
        var temp = self
        var done = false
        while !done {
            if let index = temp.lastIndex(where: { $0.isUppercase }) {
                result.insert(String(temp[index...]), at: 0)
                temp = String(temp[..<index])
                done = temp.distance(from: temp.startIndex, to: index) == 0
            }
            else {
                result.insert(temp, at: 0)
                done = true
            }
        }
        return result
    }
}

Upvotes: 0

Joyful Machines
Joyful Machines

Reputation: 120

dfri's answer which is great modified for Swift 4

his Sequence extension is the same, but extensions for as used are different:

    extension Sequence {
    func splitBefore(
        separator isSeparator: (Iterator.Element) throws -> Bool
    ) rethrows -> [AnySequence<Iterator.Element>] {
        var result: [AnySequence<Iterator.Element>] = []
        var subSequence: [Iterator.Element] = []

        var iterator = self.makeIterator()
        while let element = iterator.next() {
            if try isSeparator(element) {
                if !subSequence.isEmpty {
                    result.append(AnySequence(subSequence))
                }
                subSequence = [element]
            }
            else {
                subSequence.append(element)
            }
        }
        result.append(AnySequence(subSequence))
        return result
    }
}

string extension for use:

extension String {

var isLowercase: Bool {
    return self == self.lowercased()
}

var isUppercase: Bool {
    return self == self.uppercased()
}

and then used as follows since characters has been deprecated is Swift 4

    let teststring = "NaCuHHe"
    let splitted = teststring
    .splitBefore(separator: { $0.isUppercase })
    .map{String($0)}
    print(splitted) // ["Na", "Cu", "H", "He"]

Upvotes: 2

ArtSabintsev
ArtSabintsev

Reputation: 5190

A bit late to the party, but here's a straightforward Swift 3 approach using whitespace-delimiting. Might not be as elegant as the functional or iterator approaches, but it doesn't involve making any custom extensions on existing classes.

    let originalString = "NaCuHHe"
    var newStringArray: [String] = []
    for character in originalString.characters {
        if String(character) == String(character).uppercased() {
            newStringArray.append(" ")
        }
        newStringArray.append(String(character))
    }

    let newString = newStringArray.joined().trimmingCharacters(in: .whitespacesAndNewlines).components(separatedBy: " ")
    print(newString) // Returns ["Na", "Cu", "H", "He"]

Upvotes: 3

Luca Angeletti
Luca Angeletti

Reputation: 59506

A different solution in Functional Programming style

isUppercase

First of all lets define an easy method to check whether a Character is uppercase

extension Character {
    var isUppercase: Bool { return String(self).uppercased() == String(self) }
}

Indexes

Next we need the indexes of the uppercase characters

let indexes = Set(text
    .characters
    .enumerated()
    .filter { $0.element.isUppercase }
    .map { $0.offset })

Building the result

Now we can build the result

let chunks = text
    .characters
    .map { String($0) }
    .enumerated()
    .reduce([String]()) { chunks, elm -> [String] in
        guard !chunks.isEmpty else { return [elm.element] }
        guard !indexes.contains(elm.offset) else { return chunks + [String(elm.element)] }

        var chunks = chunks
        chunks[chunks.count-1] += String(elm.element)
        return chunks
    }

Output

["Na", "Cu", "H", "He"]

Upvotes: 6

dfrib
dfrib

Reputation: 73186

(Swift 3)

We could let ourselves be inspired by the implementation of the split function in Sequence, and implement our own splitBefore method (split before separator, omitting empty subsequences), that keep the separators in the splitted sequence.

extension Sequence {
    func splitBefore(
        separator isSeparator: (Iterator.Element) throws -> Bool
    ) rethrows -> [AnySequence<Iterator.Element>] {
        var result: [AnySequence<Iterator.Element>] = []
        var subSequence: [Iterator.Element] = []

        var iterator = self.makeIterator()
        while let element = iterator.next() {
            if try isSeparator(element) {
                if !subSequence.isEmpty {
                    result.append(AnySequence(subSequence))
                }
                subSequence = [element]
            }
            else {
                subSequence.append(element)
            }
        }
        result.append(AnySequence(subSequence))
        return result
    }
}

Used as follows

/* help property */
extension Character {
    var isUpperCase: Bool { return String(self) == String(self).uppercased() }
}

/* example usage */  
let teststring = "NaCuHHe"
let splitted = teststring
    .characters
    .splitBefore(separator: { $0.isUpperCase })
    .map{String($0)}
print(splitted) // ["Na", "Cu", "H", "He"]

Upvotes: 11

Daniel Kl&#246;ck
Daniel Kl&#246;ck

Reputation: 21137

Another way would be:

let input = "NaCuHHe"

var result = [String]()
var lastIndex = 0
for (index, letter) in input.characters.enumerate() where String(letter) != String(letter).lowercaseString {
    guard index != lastIndex else { continue }
    result += [input.substringWithRange(input.startIndex.advancedBy(lastIndex) ..< input.startIndex.advancedBy(index))]
    lastIndex = index
}
result += [input.substringWithRange(input.startIndex.advancedBy(lastIndex) ..< input.endIndex)]

result is ["Na", "Cu", "H", "He"]

Upvotes: 3

Related Questions