Reputation: 139
What's the correct way to find the longest substring match? I’ve been trying to find the longest substring match with the regexp package in Go: https://pkg.go.dev/regexp
Specifically the longest consecutive match of vowels in a string. Any length or combination of "aeiou".
Here's the code I have now. It works until the strings get too long resulting in a panic (over 1000). I start searching for the longest substring and return once a match is found.
s := "chrononhotonthuooaos"
for i := utf8.RuneCountInString(s); i >=0; i-- {
exp := "[aeiou]{"
exp += fmt.Sprint(i)
exp += "}"
regexStr := regexp.MustCompile(exp)
longestStr := regexStr.FindStringSubmatch(s)
if longestStr != nil {
fmt.Println("longest submatch found:", longestStr)
return utf8.RuneCountInString(longestStr[0])
}
}
return 0
Upvotes: 1
Views: 1204
Reputation: 74227
The reason you are getting panics is because your are creating a new regex on every iteration — a 10,000 character string? That's 10,000 compiled regexen, and I'm pretty sure those get cached. Not to mention that regex compilation is expensive.
So why use a regex at all? Something like this is almost certainly faster, doesn't create any intermediate objects, and runs in O(n) time and O(1) space complexity. It will work for strings of any length whatsoever:
https://goplay.tools/snippet/nvZwWeEF8bC
func longest_vowel_run(s string) (string, int, int) {
slen := len(s)
bgn := 0
end := 0
max := 0
x := 0
y := 0
// while we still have chars left...
for x < len(s) {
// find the first vowel
for x < slen && !vowel[s[x]] {
x++
}
// find the next non-vowel
for y = x; y < slen && vowel[s[y]]; {
y++
}
// if the current run is longer than the max run, update the max run
if z := y - x; z > max {
bgn = x
end = y
max = z
}
// pick up where we left off
x = y
}
var maxRun string
if max > 0 {
maxRun = s[bgn:end]
}
return maxRun, bgn, end - 1
}
var vowel = map[byte]bool{
'a': true, 'A': true,
'e': true, 'E': true,
'i': true, 'I': true,
'o': true, 'O': true,
'u': true, 'U': true,
}
Upvotes: 1
Reputation: 165
Here is my answer. And although it is not as elegant as that of the companions, it works for me. https://go.dev/play/p/ZpZtqt92lvc
package main
import (
"fmt"
"regexp"
)
func main() {
word := "chrononhotonthuooaos"
//word := "aeoihddkdhuaiahdnaanbcvuuooaaiieendjahaaaiiie"
// only vowels allowed (one or more)
re := regexp.MustCompile(`[aeiou]+`)
// to store max length slice of vowel macthes
max_length := 0
// position of the max lenghth slice inside `vowels_matches`
position := 0
// type ~~~~> [][]byte{} all coincident slices inside another
vowels_matches := re.FindAll([]byte(word), -1)
for i := 0; i < len(vowels_matches); i++ {
// Looking for the longest slice with vowels
// so we are going to compare its lengths
if len(vowels_matches[i]) > max_length {
//if we find an slice longer max_length will take its value
max_length = len(vowels_matches[i])
// also we want to know the position, to pick after that slice in concret
position = i
}
}
// get that slice of bytes (the longest) an convert to string
result := string(vowels_matches[position])
fmt.Printf("%v type ~~~> %T\n", result, result)
}
Another option without using regex and with the help of little recursive function:https://go.dev/play/p/7sssSWv-0UL
package main
import (
"fmt"
)
func main() {
word := "chrononhotonthuooaos"
pattern := "aeiou"
// we only want all vowels inside `word` variable
// so `vowels` will append the positions inside `word`
vowels := []int{}
// slice of slices of vowel matches
vowels_matches := [][]int{}
// to store max length of vowel macthes slices
max_length := 0
// position of the max lenghth slice inside `vowels_matches`
position := 0
final_string_result := ""
// append only vowels from `word`
for i, vowel := range word {
for _, v := range pattern {
if v == vowel {
vowels = append(vowels, i) //append the position of that vowel
}
}
}
vowels_matches = recursive(int(vowels[0]), vowels, vowels_matches)
for i := 0; i < len(vowels_matches); i++ {
if len(vowels_matches[i]) > max_length {
max_length = len(vowels_matches[i])
position = i
}
}
for _, e := range vowels_matches[position] {
final_string_result += string(word[e])
}
fmt.Println(final_string_result)
}
func recursive(valid int, vowels []int, vowels_matches [][]int) [][]int {
temp := []int{}
for i, value := range vowels {
if value == valid+1 || i == 0 {
valid = value
temp = append(temp, value)
} else {
next_last := int(vowels[i:][0])
new_slice := vowels[i:]
vowels_matches = recursive(next_last, new_slice, vowels_matches)
}
}
vowels_matches = append(vowels_matches, temp)
return vowels_matches
}
Upvotes: 0
Reputation: 38213
You can do the following:
re := regexp.MustCompile(`[aeiou]+`)
matches := re.FindAllStringSubmatch(s, -1)
if len(matches) > 0 {
longest := 0
for i := range matches {
if len(matches[i]) >= len(matches[longest]) {
longest = i
}
}
fmt.Println(matches[longest])
}
https://go.dev/play/p/loxIsR3LMOM
Or, without regexp, you can do:
s := "chrononhotonthuooaos"
var start, end int
for i := 0; i < len(s); i++ {
switch s[i] {
case 'a', 'e', 'i', 'o', 'u':
j := i + 1
for ; j < len(s); j++ {
switch s[j] {
case 'a', 'e', 'i', 'o', 'u':
continue
}
break
}
if j-i > end-start {
start, end = i, j
}
i = j
}
}
fmt.Println(s[start:end])
https://go.dev/play/p/XT5-pr3n0tl
Upvotes: 0