chg(tour): Group parallel fetcher variables into struct.
authorsgf <sgf.dma@gmail.com>
Mon, 23 May 2022 15:32:53 +0000 (18:32 +0300)
committersgf <sgf.dma@gmail.com>
Mon, 23 May 2022 15:32:53 +0000 (18:32 +0300)
crawl/crawl
crawl/crawl.go

index e6caca3..b8d7de3 100755 (executable)
Binary files a/crawl/crawl and b/crawl/crawl differ
index 7b8512a..79b5ce9 100644 (file)
@@ -18,11 +18,12 @@ type Fetcher interface {
         //IsFetched(url string) bool
 }
 
-var wg sync.WaitGroup
-
-var AlreadyFetched map[string]*Empty
-
-var mu sync.Mutex
+type ParallelFetcher struct {
+    wg sync.WaitGroup
+    mu sync.Mutex
+    AlreadyFetched map[string]*Empty
+    fetcher Fetcher
+}
 
 type index struct {
     maxDepth int
@@ -39,27 +40,27 @@ func (i index) String() string {
 
 // Crawl uses fetcher to recursively crawl
 // pages starting with url, to a maximum of depth.
-func Crawl(id index, url string, depth int, fetcher Fetcher) {
+func (p *ParallelFetcher) Crawl(id index, url string, depth int) {
         // TODO: Fetch URLs in parallel.
         // TODO: Don't fetch the same URL twice.
         // This implementation doesn't do either:
-        defer wg.Done()
+        defer p.wg.Done()
 
         if depth <= 0 {
                 return
         }
 
         fmt.Printf("Crawl() %q: started for %q\n", id, url)
-        if _, ok := AlreadyFetched[url]; ok {
+        if _, ok := p.AlreadyFetched[url]; ok {
             fmt.Printf("Crawl() %q: Already fetched %q\n", id, url)
             return
         } else {
-            mu.Lock()
+            p.mu.Lock()
             fmt.Printf("Crawl() %q: FETCHING %q\n", id, url)
-            AlreadyFetched[url] = &Empty{}
-            mu.Unlock()
+            p.AlreadyFetched[url] = &Empty{}
+            p.mu.Unlock()
         }
-        body, urls, err := fetcher.Fetch(url)
+        body, urls, err := p.fetcher.Fetch(url)
         if err != nil {
                 fmt.Println(err)
                 return
@@ -70,20 +71,22 @@ func Crawl(id index, url string, depth int, fetcher Fetcher) {
                            parentPref: id.parentPref + strconv.Itoa(id.funcNum),
                          }
         for _, u := range urls {
-                wg.Add(1)
-                go Crawl(childId, u, depth-1, fetcher)
+                p.wg.Add(1)
+                go p.Crawl(childId, u, depth-1)
                 childId.funcNum++
         }
         return
 }
 
 func main() {
-        AlreadyFetched = make(map[string]*Empty)
-        wg.Add(1)
+        p := ParallelFetcher{ AlreadyFetched: make(map[string]*Empty),
+                              fetcher: goFetcher,
+                            }
+        p.wg.Add(1)
         depth := 4
         id := index{maxDepth: depth}
-        Crawl(id, "https://golang.org/", depth, fetcher)
-        wg.Wait()
+        p.Crawl(id, "https://golang.org/", depth)
+        p.wg.Wait()
 }
 
 // fakeFetcher is Fetcher that returns canned results.
@@ -102,7 +105,7 @@ func (f fakeFetcher) Fetch(url string) (string, []string, error) {
 }
 
 // fetcher is a populated fakeFetcher.
-var fetcher = fakeFetcher{
+var goFetcher = fakeFetcher{
         "https://golang.org/": &fakeResult{
                 "The Go Programming Language",
                 []string{