導入
Html Agility Pack を NuGet からインストールする。
実装方法
void Main()
{
WebScrapingRunner.RunAsync().Wait();
}
public static class WebScrapingRunner
{
public static async Task RunAsync()
{
// 時間計測用のタイマー
var timer = new System.Diagnostics.Stopwatch();
timer.Start();
using (var client = new HttpClient() { Timeout = TimeSpan.FromMilliseconds(30000) })
{
var targetUri = new Uri("https://www.gotokyo.org/eventlist/ja/list");
var pageHtml = await client.GetHtmlAsync(targetUri);
Console.WriteLine(pageHtml.DocumentNode.WriteTo());
}
Console.WriteLine("HTML取得完了: {0:0.000}秒", timer.Elapsed.TotalSeconds);
}
}
public static class HttpClientExtensions
{
public static async Task<HtmlDocument> GetHtmlAsync(this HttpClient httpClient, Uri requestUri)
{
var response = await httpClient.GetAsync(requestUri);
return await response.Content.ReadAsHtmlAsync();
}
public static async Task<HtmlDocument> ReadAsHtmlAsync(this HttpContent httpContent)
{
var html = await httpContent.ReadAsStringAsync();
var doc = new HtmlDocument();
doc.LoadHtml(html);
return doc;
}
}