4
4

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 5 years have passed since last update.

go-tourの#71を解いてみる

Last updated at Posted at 2014-08-28

問題

こちら
元々用意されているwebクローラを改良するのが課題

  • 平行処理でクロールする
  • クロール済みのページはクロールしない

回答

こんなんでいいのかな…もっといい感じに書ける気がする。
ポイントは

  • fetch済かどうかをChennel(holder)で管理している
  • 子goroutineが終わるまで親のプロセスが待つためのChannel(done)を用意する
package main

import (
	"fmt"
	"time"
)

type Fetcher interface {
	// Fetch returns the body of URL and
	// a slice of URLs found on that page.
	Fetch(url string) (body string, urls []string, err error)
}

// Crawl uses fetcher to recursively crawl
// pages starting with url, to a maximum of depth.
func Crawl(url string, depth int, fetcher Fetcher) {
	fmt.Printf("start. depth=%d, url=%s\n", depth, url)
	if depth <= 0 {
		return
	}

	history := <-holder
	if _, ok := history[url]; ok {
		fmt.Printf("already fetched: %s \n", url)
		holder <- history
		return
	}
	history[url] = url
	holder <- history

	body, urls, err := fetcher.Fetch(url)
	if err != nil {
		fmt.Println(err)
		return
	}

	fmt.Printf("found: %s %q\n", url, body)
	done := make(chan bool)
	for _, u := range urls {
		go func(url string) {
			Crawl(url, depth-1, fetcher)
			done <- true
		}(u)
	}

	for _, _ = range urls {
		<-done
	}

	fmt.Printf("end. depth=%d, url=%s\n", depth, url)
	return
}

var holder chan fetchHistory

func init() {
	holder = make(chan fetchHistory, 1)
	holder <- fetchHistory(make(map[string]string))
}

func main() {
	Crawl("http://golang.org/", 4, fetcher)
	fmt.Println("finish!!")
}

var historyHolder chan fetchHistory

type fetchHistory map[string]string

// fakeFetcher is Fetcher that returns canned results.
type fakeFetcher map[string]*fakeResult

type fakeResult struct {
	body string
	urls []string
}

func (f fakeFetcher) Fetch(url string) (string, []string, error) {
	if res, ok := f[url]; ok {
		return res.body, res.urls, nil
	}
	// timeout
	time.Sleep(1000 * time.Millisecond)
	return "", nil, fmt.Errorf("not found: %s", url)
}

// fetcher is a populated fakeFetcher.
var fetcher = fakeFetcher{
	"http://golang.org/": &fakeResult{
		"The Go Programming Language",
		[]string{
			"http://golang.org/pkg/",
			"http://golang.org/cmd/",
		},
	},
	"http://golang.org/pkg/": &fakeResult{
		"Packages",
		[]string{
			"http://golang.org/",
			"http://golang.org/cmd/",
			"http://golang.org/pkg/fmt/",
			"http://golang.org/pkg/os/",
		},
	},
	"http://golang.org/pkg/fmt/": &fakeResult{
		"Package fmt",
		[]string{
			"http://golang.org/",
			"http://golang.org/pkg/",
		},
	},
	"http://golang.org/pkg/os/": &fakeResult{
		"Package os",
		[]string{
			"http://golang.org/",
			"http://golang.org/pkg/",
		},
	},
}
4
4
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
4
4

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?