Help us understand the problem. What is going on with this article?

puppeteerでの要素の取得方法

More than 1 year has passed since last update.

概要

puppeteerでの要素の取得のための関数は

page.evaluate + (querySelector|querySelectorALL)
page.$
page.$$
page.$eval
page.$$eval

とありますが、実際にそれらを使うためにどう書くかをまとめています。速度等は検証できていませんが、evalを使うのがそれぞれコードがシンプルになるので良いかと思います。

APIの動作の詳細はpuppeteerのAPIを参照ください。
https://github.com/GoogleChrome/puppeteer/blob/master/docs/api.md

前提

以下のようなliの中にアンカータグが入っているページを取得することを想定しています。

<ul>
  <li><a href="some link">some HTML</a></li>
  <li><a href="some link">some HTML</a></li>
  <li><a href="some link">some HTML</a></li>
  ...
</ul>

以下のようにページ読み込みを準備します。

  const browser = await puppeteer.launch({ args: ['--no-sandbox', '--disable-setuid-sandbox'] });
  const page = await browser.newPage();
  await page.goto('url you want to check');

  let itemSelector="some selecter > ul > li:nth-child(1) > a";
  let listSelector="some selecter > ul > li > a";

一つセレクターの一つの要素をとる

page.evaluate + querySelector

  var data = await page.evaluate((selector) => {
    return document.querySelector(selector).textContent;
  }, itemSelector);

page.$

  var item = await page.$(itemSelector);
  var data = await (await item.getProperty('textContent')).jsonValue();

page.$eval

  var data = await page.$eval(itemSelector, item => {
      return item.textContent;
  });

一つセレクターの複数要素をとる

page.evaluate + querySelector

  var data = await page.evaluate((selector) => {
    return {
        href: document.querySelector(selector).href,
        textContent: document.querySelector(selector).textContent,
        innerHTML: document.querySelector(selector).innerHTML
    };
  }, itemSelector);

page.$

  var item = await page.$(itemSelector);
  var data = {
        href: await (await item.getProperty('href')).jsonValue(),
        textContent: await (await item.getProperty('textContent')).jsonValue(),
        innerHTML: await (await item.getProperty('innerHTML')).jsonValue()
  };

page.$eval

  var data = await page.$eval(itemSelector, item => {
    return {
        href: item.href,
        textContent: item.textContent,
        innerHTML: item.innerHTML
    };
  });

複数セレクターの一つの要素をとる

page.evaluate + querySelectorAll

  var datas = await page.evaluate((selector) => {
    const list = Array.from(document.querySelectorAll(selector));
    return list.map(data => data.textContent);
  }, listSelector);

page.$$

  var list = await page.$$(listSelector);
  var datas = [];
  for (let i = 0; i < list.length; i++) {
    datas.push(await (await list[i].getProperty('textContent')).jsonValue())
  }

page.$$eval

  var datas = await page.$$eval(listSelector, list => {
      return list.map(data => data.textContent);
  });

複数セレクターの複数要素をとる

page.evaluate + querySelectorAll

  var datas = await page.evaluate((selector) => {
    const list = Array.from(document.querySelectorAll(selector));
    var datas=[];
    for (let i = 0; i < list.length; i++) {
      var data = {
        href: list[i].href,
        textContent: list[i].textContent,
        innerHTML: list[i].innerHTML
      };
      datas.push(data);
    }
    return datas;
  }, listSelector);

page.$$

  var list = await page.$$(listSelector);
  var datas = [];
  for (let i = 0; i < list.length; i++) {
    var data = {
      href: await (await list[i].getProperty('href')).jsonValue(),
      textContent: await (await list[i].getProperty('textContent')).jsonValue(),
      innerHTML: await (await list[i].getProperty('innerHTML')).jsonValue()
    };
    datas.push(data);
  }

page.$$eval

  var datas = await page.$$eval(listSelector, list => {
    var datas=[];
    for (let i = 0; i < list.length; i++) {
      var data = {
        href: list[i].href,
        textContent: list[i].textContent,
        innerHTML: list[i].innerHTML
      };
      datas.push(data);
    }
    return datas;
  });

確認用コード

const fs = require('fs');
const puppeteer = require('puppeteer');

(async() => {

  const browser = await puppeteer.launch({ args: ['--no-sandbox', '--disable-setuid-sandbox'] });
  const page = await browser.newPage();
  await page.goto('url you want to check');

  let itemSelector="some selecter > ul > li:nth-child(1) > a";
  let listSelector="some selecter > ul > li > a";

  ////////////////////////////////////////////////////////

  var data = await page.evaluate((selector) => {
    return document.querySelector(selector).textContent;
  }, itemSelector);
  console.log("one item one attribute");
  console.log(data);

  var item = await page.$(itemSelector);
  var data = await (await item.getProperty('textContent')).jsonValue();
  console.log("one item one attribute using $");
  console.log(data);

  var data = await page.$eval(itemSelector, item => {
      return item.textContent;
  });
  console.log("some items one attribute using $eval");
  console.log(data);

  ////////////////////////////////////////////////////////

  var data = await page.evaluate((selector) => {
    return {
        href: document.querySelector(selector).href,
        textContent: document.querySelector(selector).textContent,
        innerHTML: document.querySelector(selector).innerHTML
    };
  }, itemSelector);
  console.log("one item some attributes");
  console.log(data);

  var item = await page.$(itemSelector);
  var data = {
        href: await (await item.getProperty('href')).jsonValue(),
        textContent: await (await item.getProperty('textContent')).jsonValue(),
        innerHTML: await (await item.getProperty('innerHTML')).jsonValue()
  };
  console.log("one item some attributes using $");
  console.log(data);

  var data = await page.$eval(itemSelector, item => {
    return {
        href: item.href,
        textContent: item.textContent,
        innerHTML: item.innerHTML
    };
  });
  console.log("some items some attributes using $eval");
  console.log(data);

  ////////////////////////////////////////////////////////

  var datas = await page.evaluate((selector) => {
    const list = Array.from(document.querySelectorAll(selector));
    return list.map(data => data.textContent);
  }, listSelector);
  console.log("some items one attribute");
  console.log(datas);

  var list = await page.$$(listSelector);
  var datas = [];
  for (let i = 0; i < list.length; i++) {
    datas.push(await (await list[i].getProperty('textContent')).jsonValue())
  }
  console.log("some items one attribute using $$");
  console.log(datas);

  var datas = await page.$$eval(listSelector, list => {
      return list.map(data => data.textContent);
  });
  console.log("some items one attribute using $$eval");
  console.log(datas);

  ////////////////////////////////////////////////////////

  var datas = await page.evaluate((selector) => {
    const list = Array.from(document.querySelectorAll(selector));
    var datas=[];
    for (let i = 0; i < list.length; i++) {
      var data = {
        href: list[i].href,
        textContent: list[i].textContent,
        innerHTML: list[i].innerHTML
      };
      datas.push(data);
    }
    return datas;
  }, listSelector);
  console.log("some items some attributes");
  console.log(datas);

  var list = await page.$$(listSelector);
  var datas = [];
  for (let i = 0; i < list.length; i++) {
    var data = {
      href: await (await list[i].getProperty('href')).jsonValue(),
      textContent: await (await list[i].getProperty('textContent')).jsonValue(),
      innerHTML: await (await list[i].getProperty('innerHTML')).jsonValue()
    };
    datas.push(data);
  }
  console.log("some items one attribute using $$");
  console.log(datas);

  var datas = await page.$$eval(listSelector, list => {
    var datas=[];
    for (let i = 0; i < list.length; i++) {
      var data = {
        href: list[i].href,
        textContent: list[i].textContent,
        innerHTML: list[i].innerHTML
      };
      datas.push(data);
    }
    return datas;
  });
  console.log("some items some attributes using $$eval");
  console.log(datas);

  ////////////////////////////////////////////////////////

  browser.close();

})();
andfactory
Smartphone Idea Companyとして、人々の生活に「&(アンド)」を届ける。
https://andfactory.co.jp/
Why not register and get more from Qiita?
  1. We will deliver articles that match you
    By following users and tags, you can catch up information on technical fields that you are interested in as a whole
  2. you can read useful information later efficiently
    By "stocking" the articles you like, you can search right away