Haskell
curl
shpider

Network.Shpider を使って EUC-JP なサイトをダウンロードする

More than 5 years have passed since last update.


shpidertest.hs

import Network.Shpider

import Network.Curl
import Codec.Text.IConv
import qualified Data.ByteString.Lazy.Char8 as B
import qualified Data.ByteString.Lazy.UTF8 as U
import Foreign.C.String
import Data.IORef

main = runShpider $ do
htmlRef <- lift $ newIORef ""
addCurlOpts [CurlWriteFunction $ gatherOutput_ (\s -> peekCAStringLen s >>= \t -> modifyIORef htmlRef (++ t))]
download "http://www.sampou.org/haskell/a-a-monads/html/hardway.html"
html <- lift $ readIORef htmlRef
let u_source = U.toString . convert "EUC-JP" "UTF-8" $ B.pack $ html
lift $ putStrLn u_source