Reputation: 11
I've been given a task to search for URLs in text file useng regex and goroutines with waitgroup in the way the given way: text should be devided between N workers (goroutines), each goroutine search for //https://, goroutines in waitgroup, final result should be a slice of strings (URLs) from all goroutines together.
Iam wotking with a txt.file with dozens of stuff in a single string but including URLs right for now i know how to extract a slice of URLs from the text but without deviding a text and goroutines...
import (
"fmt"
"os"
"regexp"
"sync"
"time"
)
func Parser1(wg *sync.WaitGroup) {
time.Sleep((1 * time.Second))
b, err := os.ReadFile("repitations")
if err != nil {
fmt.Print(err)
}
str := string(b)
re := regexp.MustCompile(`(?:https?://)?(?:[^/.]+\.)*google\.com(?:/[^/\s]+)*/?`)
fmt.Printf("%q\n", re.FindAllString(str, -1))
wg.Done()
}
func Parser2(wg *sync.WaitGroup) {
time.Sleep((1 * time.Second))
b, err := os.ReadFile("repitations")
if err != nil {
fmt.Print(err)
}
str := string(b)
re := regexp.MustCompile(`(?:https?://)?(?:[^/.]+\.)*google\.com(?:/[^/\s]+)*/?`)
fmt.Printf("%q\n", re.FindAllString(str, -1))
wg.Done()
}
func main() {
var wg sync.WaitGroup
wg.Add(2)
go Parser1(&wg)
go Parser2(&wg)
wg.Wait()
fmt.Println("Well done!")
}````
Upvotes: 0
Views: 178
Reputation: 56
Split your read process.
Open file with os.Open() and read sequentially with file.ReadAt().
Pass length to read and offset from start to Parser()
func Parser(wg *sync.WaitGroup, f *os.File, length int64, offset int64) {
defer wg.Done()
content := make([]byte, length)
_, err := f.ReadAt(content, offset)
if err != nil {
log.Fatal(err)
}
log.Printf("%s", content)
....
}
Upvotes: 0