chg(tour): Group parallel fetcher variables into struct.

author sgf <sgf.dma@gmail.com>

Mon, 23 May 2022 15:32:53 +0000 (18:32 +0300)

committer sgf <sgf.dma@gmail.com>

Mon, 23 May 2022 15:32:53 +0000 (18:32 +0300)
author sgf <sgf.dma@gmail.com>
Mon, 23 May 2022 15:32:53 +0000 (18:32 +0300)
committer sgf <sgf.dma@gmail.com>
Mon, 23 May 2022 15:32:53 +0000 (18:32 +0300)
diff --git a/crawl/crawl b/crawl/crawl

index e6caca3..b8d7de3 100755 (executable)

Binary files a/crawl/crawl and b/crawl/crawl differ
diff --git a/crawl/crawl.go b/crawl/crawl.go

index 7b8512a..79b5ce9 100644 (file)
--- a/crawl/crawl.go
+++ b/crawl/crawl.go
@@ -18,11 +18,12 @@ type Fetcher interface {
          //IsFetched(url string) bool
  }
  
-var wg sync.WaitGroup
-
-var AlreadyFetched map[string]*Empty
-
-var mu sync.Mutex
+type ParallelFetcher struct {
+    wg sync.WaitGroup
+    mu sync.Mutex
+    AlreadyFetched map[string]*Empty
+    fetcher Fetcher
+}
  
  type index struct {
      maxDepth int
@@ -39,27 +40,27 @@ func (i index) String() string {
  
  // Crawl uses fetcher to recursively crawl
  // pages starting with url, to a maximum of depth.
-func Crawl(id index, url string, depth int, fetcher Fetcher) {
+func (p *ParallelFetcher) Crawl(id index, url string, depth int) {
          // TODO: Fetch URLs in parallel.
          // TODO: Don't fetch the same URL twice.
          // This implementation doesn't do either:
-        defer wg.Done()
+        defer p.wg.Done()
  
          if depth <= 0 {
                  return
          }
  
          fmt.Printf("Crawl() %q: started for %q\n", id, url)
-        if _, ok := AlreadyFetched[url]; ok {
+        if _, ok := p.AlreadyFetched[url]; ok {
              fmt.Printf("Crawl() %q: Already fetched %q\n", id, url)
              return
          } else {
-            mu.Lock()
+            p.mu.Lock()
              fmt.Printf("Crawl() %q: FETCHING %q\n", id, url)
-            AlreadyFetched[url] = &Empty{}
-            mu.Unlock()
+            p.AlreadyFetched[url] = &Empty{}
+            p.mu.Unlock()
          }
-        body, urls, err := fetcher.Fetch(url)
+        body, urls, err := p.fetcher.Fetch(url)
          if err != nil {
                  fmt.Println(err)
                  return
@@ -70,20 +71,22 @@ func Crawl(id index, url string, depth int, fetcher Fetcher) {
                             parentPref: id.parentPref + strconv.Itoa(id.funcNum),
                           }
          for _, u := range urls {
-                wg.Add(1)
-                go Crawl(childId, u, depth-1, fetcher)
+                p.wg.Add(1)
+                go p.Crawl(childId, u, depth-1)
                  childId.funcNum++
          }
          return
  }
  
  func main() {
-        AlreadyFetched = make(map[string]*Empty)
-        wg.Add(1)
+        p := ParallelFetcher{ AlreadyFetched: make(map[string]*Empty),
+                              fetcher: goFetcher,
+                            }
+        p.wg.Add(1)
          depth := 4
          id := index{maxDepth: depth}
-        Crawl(id, "https://golang.org/", depth, fetcher)
-        wg.Wait()
+        p.Crawl(id, "https://golang.org/", depth)
+        p.wg.Wait()
  }
  
  // fakeFetcher is Fetcher that returns canned results.
@@ -102,7 +105,7 @@ func (f fakeFetcher) Fetch(url string) (string, []string, error) {
  }
  
  // fetcher is a populated fakeFetcher.
-var fetcher = fakeFetcher{
+var goFetcher = fakeFetcher{
          "https://golang.org/": &fakeResult{
                  "The Go Programming Language",
                  []string{
author	sgf <sgf.dma@gmail.com>
	Mon, 23 May 2022 15:32:53 +0000 (18:32 +0300)
committer	sgf <sgf.dma@gmail.com>
	Mon, 23 May 2022 15:32:53 +0000 (18:32 +0300)
crawl/crawl		patch \| blob \| history
crawl/crawl.go		patch \| blob \| history