//IsFetched(url string) bool
}
-var wg sync.WaitGroup
-
-var AlreadyFetched map[string]*Empty
-
-var mu sync.Mutex
+type ParallelFetcher struct {
+ wg sync.WaitGroup
+ mu sync.Mutex
+ AlreadyFetched map[string]*Empty
+ fetcher Fetcher
+}
type index struct {
maxDepth int
// Crawl uses fetcher to recursively crawl
// pages starting with url, to a maximum of depth.
-func Crawl(id index, url string, depth int, fetcher Fetcher) {
+func (p *ParallelFetcher) Crawl(id index, url string, depth int) {
// TODO: Fetch URLs in parallel.
// TODO: Don't fetch the same URL twice.
// This implementation doesn't do either:
- defer wg.Done()
+ defer p.wg.Done()
if depth <= 0 {
return
}
fmt.Printf("Crawl() %q: started for %q\n", id, url)
- if _, ok := AlreadyFetched[url]; ok {
+ if _, ok := p.AlreadyFetched[url]; ok {
fmt.Printf("Crawl() %q: Already fetched %q\n", id, url)
return
} else {
- mu.Lock()
+ p.mu.Lock()
fmt.Printf("Crawl() %q: FETCHING %q\n", id, url)
- AlreadyFetched[url] = &Empty{}
- mu.Unlock()
+ p.AlreadyFetched[url] = &Empty{}
+ p.mu.Unlock()
}
- body, urls, err := fetcher.Fetch(url)
+ body, urls, err := p.fetcher.Fetch(url)
if err != nil {
fmt.Println(err)
return
parentPref: id.parentPref + strconv.Itoa(id.funcNum),
}
for _, u := range urls {
- wg.Add(1)
- go Crawl(childId, u, depth-1, fetcher)
+ p.wg.Add(1)
+ go p.Crawl(childId, u, depth-1)
childId.funcNum++
}
return
}
func main() {
- AlreadyFetched = make(map[string]*Empty)
- wg.Add(1)
+ p := ParallelFetcher{ AlreadyFetched: make(map[string]*Empty),
+ fetcher: goFetcher,
+ }
+ p.wg.Add(1)
depth := 4
id := index{maxDepth: depth}
- Crawl(id, "https://golang.org/", depth, fetcher)
- wg.Wait()
+ p.Crawl(id, "https://golang.org/", depth)
+ p.wg.Wait()
}
// fakeFetcher is Fetcher that returns canned results.
}
// fetcher is a populated fakeFetcher.
-var fetcher = fakeFetcher{
+var goFetcher = fakeFetcher{
"https://golang.org/": &fakeResult{
"The Go Programming Language",
[]string{