0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 1 year has passed since last update.

go関連で見た記事 2022/03/30

Last updated at Posted at 2022-03-30

package main

import (
	"bufio"
	"fmt"
	"log"
	"os"
)

func main() {
	f, err := os.Open("./main.go")
	if err != nil {
		log.Fatal(err)
	}
	defer f.Close()

	s := bufio.NewScanner(f)

	for s.Scan() {
		fmt.Println(s.Text())
	}
}

一行ずつ読み込む

国名スクレイピングサンプルコード

package samples

import (
	"bytes"
	"crypto/sha256"
	"encoding/hex"
	"io"
	"io/ioutil"
	"log"
	"net/http"
	"os"

	"github.com/PuerkitoBio/goquery"
)

func CollectCountryNamesByGoquery() {
	url := "https://www.mofa.go.jp/mofaj/area/index.html"
	countryNames := collectCountryNames(url)
	save(countryNames)
}

func collectCountryNames(url string) []string {
	doc, err := createtDocument(url)
	if err != nil {
		log.Fatal(err)
		return []string{}
	}

	return collectCountryNamesFromDoc(doc)
}

func createtDocument(url string) (*goquery.Document, error) {
	if hasCache(url) {
		log.Println("From cache.")
		return createtDocumentFromCache(url)
	} else {
		log.Println("From web.")
		return createtDocumentFromWebResource(url)
	}
}

func collectCountryNamesFromDoc(doc *goquery.Document) []string {
	countryNames := []string{}
	doc.Find("li.styled2").Each(func(i int, s *goquery.Selection) {
		s.Find("a").Each(func(j int, t *goquery.Selection) {
			countryNames = append(countryNames, t.Text())
		})
	})
	return countryNames
}

func save(lines []string) error {
	f, err := os.Create(getSavePath())
	if err != nil {
		log.Fatal(err)
		return err
	}
	defer f.Close()

	err = writeLines(f, lines)
	if err != nil {
		return err
	}

	return nil
}

func writeLines(f *os.File, lines []string) error {
	for _, line := range lines {
		err := writeLine(f, line)
		if err != nil {
			return err
		}
	}
	return nil
}

func writeLine(f *os.File, line string) error {
	_, err := f.WriteString(line + "\n")
	return err
}

func getSavePath() string {
	return "./CountryNames.txt"
}

func createtDocumentFromCache(url string) (*goquery.Document, error) {
	r, err := getCacheReader(url)
	if err != nil {
		return nil, err
	}
	return goquery.NewDocumentFromReader(r)
}

func createtDocumentFromWebResource(url string) (*goquery.Document, error) {
	resp, err := sendRequest(url)
	if err != nil {
		return nil, err
	}

	doc, err := createDocumentFromResponse(url, resp)
	if err != nil {
		return &goquery.Document{}, err
	}
	return doc, nil
}

func createDocumentFromResponse(url string, resp *http.Response) (*goquery.Document, error) {
	doc, err := goquery.NewDocumentFromResponse(resp)
	if err != nil {
		return doc, err
	}
	createCacheFromDoc(url, doc)
	return doc, nil
}

func createCacheFromDoc(url string, doc *goquery.Document) error {
	html, err := doc.Html()
	if err != nil {
		return err
	}
	err = ioutil.WriteFile(getCachePath(url), []byte(html), os.ModePerm)
	if err != nil {
		return err
	}
	return err
}

func sendRequest(url string) (*http.Response, error) {
	return http.Get(url)
}

func getCacheReader(url string) (io.Reader, error) {
	b, err := ioutil.ReadFile(getCachePath(url))
	return bytes.NewBuffer(b), err
}

func hasCache(url string) bool {
	dest := getCachePath(url)
	return isPathExist(dest)
}

func isPathExist(path string) bool {
	if _, err := os.Stat(path); os.IsNotExist(err) {
		return false
	}
	return true
}

func getCachePath(url string) string {
	base := "./"
	hash := getHashString(url)
	return base + hash + ".html"
}

func getHashString(s string) string {
	hash := sha256.Sum256([]byte(s))
	return hex.EncodeToString(hash[:])
}
0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?