LoginSignup
7
6

More than 5 years have passed since last update.

GASでスクレイピング:SUUMOから賃貸物件情報を取得

Last updated at Posted at 2018-01-09

GASでSUUMOの賃貸物件情報をスクレイピングして取得する

GAS実装

スクレイピング手法

まずはSUUMOのHTMLを読み込んで,スクレイピングのKeyとなるHTML文をピックアップした.
その結果,3つほど見つかった.
それを var tag =""で指定して,UrlFetchAppで取得したHTMLをsplit(\n)して,該当行を検索.
そこからn行と言った形で指定していって,情報を取得する

ソースコード

function doCurl(){
  // SUUMOのURL
  var url = "https://suumo.jp/chintai/bc_100106732165/?suit=STfr20160902000"
  main(url)
}

//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//  main: 
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
function main(url){
  var html = UrlFetchApp.fetch(url).getContentText();
  var splited_html = html.split("\n");

  var tag='<span class="jj-fr_detail-icon jj-fr_detail-icon--note"></span><span>お気に入りに登録する</span></a>'
  for(var i=0; i<splited_html.length; i++){
    var num = i
    var body = splited_html[i]
    var index = body.indexOf(tag);
    if(index != -1){
      Logger.log("================================")

      yachin = cut_string(splited_html[i+17].trim(),"<span>", "</span>")//家賃
      yachin = Number(cut_string(yachin, "", "万円"))*10000
      Logger.log("家賃: "+yachin)

      kanrihi = cut_string(splited_html[i+21].trim(),"<span>", "</span>")//管理費
      kanrihi = Number(cut_string(kanrihi, "", ""))
      Logger.log("管理費: "+kanrihi)

      shikikin = cut_string(splited_html[i+29].trim(),"<span>", "</span>")//敷金
      shikikin = Number(cut_string(shikikin, "", "万円"))*10000
      Logger.log("敷金: "+shikikin)

      reikin = cut_string(splited_html[i+33].trim(),"<span>", "</span>")//礼金
      reikin = Number(cut_string(reikin, "", "万円"))*10000
      Logger.log("礼金: "+reikin)

      madori = cut_string(splited_html[i+44].trim(),"<div>", "</div>")//間取り
      Logger.log("間取り: "+madori)

      hirosa = cut_string(splited_html[i+46].trim(),"", "m<sup>")//広さ
      hirosa = Math.round(hirosa)
      Logger.log("広さ: "+hirosa)

      muki = cut_string(splited_html[i+48].trim(),"<div>", "</div>")//向き
      Logger.log("向き: "+muki)

      chikunen = cut_string(splited_html[i+56].trim(),"<div>", "</div>")//築年数
      if(chikunen=="新築"){
        chikunen = 1
      }else{
        chikunen = cut_string(chikunen, "","")
      }
      Logger.log("築年数: "+chikunen)
    }
  }

  var tag = '[<a href="javascript:norikaePop('
  for(var i=0; i<splited_html.length; i++){
    var num = i
    var body = splited_html[i]
    var index = body.indexOf(tag);
    if(index != -1){  
      moyori = cut_string(splited_html[i-1].trim(),"/", "")//最寄り
      Logger.log("最寄り駅: "+moyori)

      moyori_toho = cut_string(splited_html[i-1].trim(),"", "")//最寄り
      Logger.log("駅徒歩: "+moyori_toho)
    }
  }

  var tag ='<h2><span>物件概要</span></h2>'
  for(var i=0; i<splited_html.length; i++){
    var num = i
    var body = splited_html[i]
    var index = body.indexOf(tag);
    if(index != -1){
      madori_detail = cut_string(splited_html[i+11].trim(),"<td>", "</td>")//間取り詳細
      Logger.log("間取り詳細: "+madori_detail)

      kozo = cut_string(splited_html[i+14].trim(),"<td>", "</td>")//構造
      Logger.log("構造: "+kozo)

      kaisu = cut_string(splited_html[i+18].trim(),"<td>", "</td>")//階数
      Logger.log("階: "+kaisu)

      kenchikubi = cut_string(splited_html[i+20].trim(),"<td>", "</td>")//建築日
      Logger.log("建築日: "+kenchikubi)
    }
  }
  var res = {
    "yachin": yachin,
    "kanrihi": kanrihi,
    "shikikin": shikikin,
    "reikin": reikin,
    "madori": madori,
    "hirosa": hirosa,
    "muki": muki,
    "chikunen": chikunen,
    "madori_detail": madori_detail,
    "kaisu": kaisu,
    "kenchikubi": kenchikubi,
    "moyori": moyori,
    "moyori_toho": moyori_toho,
    "kozo": kozo,
    "url": url,
  }
  Logger.log("================================")
  Logger.log(res)
  return res
}

実行結果

ログ出力はこんな感じです

スクリーンショット 2018-01-09 21.12.00.png

7
6
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
7
6