get_from_naver.rb
#!/usr/bin/env ruby
# -*- coding: utf-8 -*-
require 'digest/sha1'
require 'nokogiri'
require 'parallel'
require 'pathname'
require 'open-uri'
def get_total_page(url)
links = Array.new
doc = Nokogiri::HTML(open(url))
doc.css('.MdPagination03 a').each do |link|
links << link.content.to_i
end
return links.max
end
def get_individual_pages(url)
individual_pages = Array.new
doc = Nokogiri::HTML(open(url))
doc.css('.mdMTMWidget01ItemImg01 a').each do |link|
individual_pages << link['href']
end
return individual_pages
end
dir = Pathname.new('')
dir.mkdir unless dir.exist?
url = ARGV[0]
for page in 1..get_total_page(url) do
Parallel.each(get_individual_pages("#{url}?page=#{page}"), :in_threads => 10) do |individual_page|
doc = Nokogiri::HTML(open(individual_page))
image = doc.css('.mdEndView01Img01 a img').first.attribute('src').value
filename = Digest::SHA1.hexdigest(image) + '.jpg'
filepath = dir + filename
next if filepath.file?
puts "Download... #{image}"
res = open(image) rescue next
if res.content_type =~ /^image/
open(filepath, 'w').print res.read
end
end
end