Reputation: 11853
I am using Swift 3 and trying to access captured groups.
let regexp = "((ALREADY PAID | NOT ALR | PROVIDER MAY | READY | MAY BILL | BILL YOU | PAID)((.|\\n)*))(( \\d+)(\\.+|-+)(\\d\\d))"
// check if some substring is in the recognized text
if let range = stringText.range(of:regexp, options: .regularExpression) {
let result = tesseract.recognizedText.substring(with:range)
}
I want to be able to extract out the last two numbers captured (\d\d
) so if the text was: ALREADY PAID asfasdfadsfasdf 39.15
, it would extract 15
. Here is a regex builder that shows what I want. Normally, I would be able to do $8
to get the 8th group that was extracted but I don't know how to do that in Swift 3.
Upvotes: 25
Views: 20425
Reputation: 12582
This pasta may save someone some time.
This example specifically finds ALL the matching GROUPS in the simple case of JUST ONE group to find, which is a common use case.
Example,
input "blah blah height=3 blh height=13 height=7 blah"
regex height=(\d+)
result, the strings ["3", "13", "7"]
so ...
extension String {
func collectEmAll(_ regex: String) -> [String] {
let rx = try! NSRegularExpression(pattern: regex)
let rr = rx.matches(in: self, range: NSMakeRange(0, self.utf16.count))
return Array(rr).compactMap{ self.substring(with: $0.range(at: 1)) }
}
func substring(with nsrange: NSRange) -> String? {
guard let range = Range(nsrange, in: self) else { return nil }
return String(self[range])
}
}
Don't forget in the idiotic .range#at system, 0 is the "whole thing" and 1 is what you typically want, hence "1" above.
test
print("blah height=3 blah height=13 height=7". collectEmAll("height=(\\d+)"))
Wherever possible use the new regex features in latest iOS.
Upvotes: 0
Reputation: 7238
A slightly altered version based on @Vyacheslav's answer with different error handling approach:
enum ParsingError: Error {
// You can pass more info here with parameter(s) if you want, e.g. `case let invalidRange(originalString, failedAtRange)`
case invalidRange
}
protocol StringUtilityRequired {
var stringUtility: StringUtility { get }
}
extension StringUtilityRequired {
var stringUtility: StringUtility { StringUtility() }
}
enum StringUtility {
func groups(_ str: String, pattern: String) throws -> [[String]] {
let regex = try NSRegularExpression(pattern: pattern)
let matches = regex.matches(in: str, range: NSRange(str.startIndex..., in: str))
return try matches.map { match throws in
return try (0 ..< match.numberOfRanges).map { range throws in
let rangeBounds = match.range(at: range)
guard let range = Range(rangeBounds, in: str) else {
throw ParsingError.invalidRange
}
return String(str[range])
}
}
}
// This component is stateless; it doesn't have any side effect
case pure
init() { self = .pure }
}
Usage:
struct MyComponent: StringUtilityRequired {
func myFunc() throws {
let groups = try stringUtility.groups("Test 123", pattern: "(.+)\s(.+)")
print(groups)
}
}
Upvotes: 0
Reputation: 27211
Swift 4, Swift 5
extension String {
func groups(for regexPattern: String) -> [[String]] {
do {
let text = self
let regex = try NSRegularExpression(pattern: regexPattern)
let matches = regex.matches(in: text,
range: NSRange(text.startIndex..., in: text))
return matches.map { match in
return (0..<match.numberOfRanges).map {
let rangeBounds = match.range(at: $0)
guard let range = Range(rangeBounds, in: text) else {
return ""
}
return String(text[range])
}
}
} catch let error {
print("invalid regex: \(error.localizedDescription)")
return []
}
}
}
example:
let res = "1my 2own 3string".groups(for:"(([0-9]+)[a-z]+) ")
(lldb) po res ▿ 2 elements
▿ 0 : 3 elements- 0 : "1my " - 1 : "1my" - 2 : "1"
▿ 1 : 3 elements
- 0 : "2own " - 1 : "2own" - 2 : "2"
Upvotes: 43
Reputation: 10426
As ever, a simple extension seems to be the way around swift's bizarre overcomplication...
extension NSTextCheckingResult {
func groups(testedString:String) -> [String] {
var groups = [String]()
for i in 0 ..< self.numberOfRanges
{
let group = String(testedString[Range(self.range(at: i), in: testedString)!])
groups.append(group)
}
return groups
}
}
Use it like this:
if let match = myRegex.firstMatch(in: someString, range: NSMakeRange(0, someString.count)) {
let groups = match.groups(testedString: someString)
//... do something with groups
}
Upvotes: 8
Reputation: 534893
but I don't know how to do that in Swift 3.
When you receive a match from NSRegularExpression, what you get is an NSTextCheckingResult. You call rangeAt
to get a specific capture group.
Example:
let s = "hey ho ha"
let pattern = "(h).*(h).*(h)"
// our goal is capture group 3, "h" in "ha"
let regex = try! NSRegularExpression(pattern: pattern)
let result = regex.matches(in:s, range:NSMakeRange(0, s.utf16.count))
let third = result[0].rangeAt(3) // <-- !!
third.location // 7
third.length // 1
Upvotes: 38