From: sgf Date: Mon, 23 May 2022 15:32:53 +0000 (+0300) Subject: chg(tour): Group parallel fetcher variables into struct. X-Git-Url: https://gitweb.sgf-dma.tk/?a=commitdiff_plain;h=4fdf2f9f1d5c59219f46fa2c3f99892b96d51223;p=go.git chg(tour): Group parallel fetcher variables into struct. --- diff --git a/crawl/crawl b/crawl/crawl index e6caca3..b8d7de3 100755 Binary files a/crawl/crawl and b/crawl/crawl differ diff --git a/crawl/crawl.go b/crawl/crawl.go index 7b8512a..79b5ce9 100644 --- a/crawl/crawl.go +++ b/crawl/crawl.go @@ -18,11 +18,12 @@ type Fetcher interface { //IsFetched(url string) bool } -var wg sync.WaitGroup - -var AlreadyFetched map[string]*Empty - -var mu sync.Mutex +type ParallelFetcher struct { + wg sync.WaitGroup + mu sync.Mutex + AlreadyFetched map[string]*Empty + fetcher Fetcher +} type index struct { maxDepth int @@ -39,27 +40,27 @@ func (i index) String() string { // Crawl uses fetcher to recursively crawl // pages starting with url, to a maximum of depth. -func Crawl(id index, url string, depth int, fetcher Fetcher) { +func (p *ParallelFetcher) Crawl(id index, url string, depth int) { // TODO: Fetch URLs in parallel. // TODO: Don't fetch the same URL twice. // This implementation doesn't do either: - defer wg.Done() + defer p.wg.Done() if depth <= 0 { return } fmt.Printf("Crawl() %q: started for %q\n", id, url) - if _, ok := AlreadyFetched[url]; ok { + if _, ok := p.AlreadyFetched[url]; ok { fmt.Printf("Crawl() %q: Already fetched %q\n", id, url) return } else { - mu.Lock() + p.mu.Lock() fmt.Printf("Crawl() %q: FETCHING %q\n", id, url) - AlreadyFetched[url] = &Empty{} - mu.Unlock() + p.AlreadyFetched[url] = &Empty{} + p.mu.Unlock() } - body, urls, err := fetcher.Fetch(url) + body, urls, err := p.fetcher.Fetch(url) if err != nil { fmt.Println(err) return @@ -70,20 +71,22 @@ func Crawl(id index, url string, depth int, fetcher Fetcher) { parentPref: id.parentPref + strconv.Itoa(id.funcNum), } for _, u := range urls { - wg.Add(1) - go Crawl(childId, u, depth-1, fetcher) + p.wg.Add(1) + go p.Crawl(childId, u, depth-1) childId.funcNum++ } return } func main() { - AlreadyFetched = make(map[string]*Empty) - wg.Add(1) + p := ParallelFetcher{ AlreadyFetched: make(map[string]*Empty), + fetcher: goFetcher, + } + p.wg.Add(1) depth := 4 id := index{maxDepth: depth} - Crawl(id, "https://golang.org/", depth, fetcher) - wg.Wait() + p.Crawl(id, "https://golang.org/", depth) + p.wg.Wait() } // fakeFetcher is Fetcher that returns canned results. @@ -102,7 +105,7 @@ func (f fakeFetcher) Fetch(url string) (string, []string, error) { } // fetcher is a populated fakeFetcher. -var fetcher = fakeFetcher{ +var goFetcher = fakeFetcher{ "https://golang.org/": &fakeResult{ "The Go Programming Language", []string{