More than 1 year has passed since last update.

ソーラーパネル稼働状況取得（AWS shellcloud上でのpython実行）

Last updated at 2024-04-09Posted at 2024-04-08

前回、時前ラズパイで、ソーラーパネルの稼働状況をPythonで取得することができました。

自前ラズパイでの動作

自前ラズパイでの動作は、上手く言っていました。けど、サーバーレスで実行したいため、色々試行錯誤しています。

GAS上での動作

GASでは、上手くCRFSを取得できず、断念しました。

今回はAWSで行きます。

これなら、サーバーレスで実行できます。
自前のラズパイ上での実行でもいいのですが、技術的好奇心からAWSの扉を叩きます。
また、以前作成したPythonのプログラムも基本的に動きます。
まずは、shellcloud上で、作業環境を構築します。

スクレイピング用

$ pip install requests
$ pip install beautifulsoup4

GAS用

$ pip install gspread
$ pip install oauth2client

と以前作ったプログラムをコピペで作ります。
当時とは、HTMLの構成が変わったようで、ログインこそできるものの、
スクレイピングが上手くいきません。
以前のプログラムは、動作確認のつもりで、HTML内の数値直前までを検索・削除し、数値を抜き出すものでした。
このため、webページがちょっと改修されただけで動かなくなるものでした。
できるだけ揺らぎに強いものにしようと思います。

下記のように編集しました。

# -*- coding: utf-8 -*-
# 20240406
import requests
from bs4 import BeautifulSoup
import re
import csv
from datetime import datetime, timedelta

# UTCから日本時間に変換（UTC+9時間）
current_utc_time = datetime.utcnow()
jst_offset = timedelta(hours=9)
jst_time = current_utc_time + jst_offset

# 1.ログインページにアクセスする
# ref:https://rurukblog.com/post/requests-cookies-session/
url_login = "https://ctrl.kp-net.com/settingcontrol/processLogin"
session = requests.session()
# ログインページへのアクセス完了
req_before_login = session.get(url_login)

# ログインするための情報を準備する
login_data = {
   'loginid': 'xxxxxxxxxxxxx',
   'loginpassword': 'yyyyyyyyyyyyyy'
}

# ログインするためにcsrfトークンが必要となるため情報を取得
bs = BeautifulSoup(req_before_login.text, 'html.parser')
csrf_token = bs.find(attrs={'name':'_csrf'}).get('content')
login_data['_csrf'] = csrf_token

# 2. ログインページで認証を行い、管理者ページへ遷移する
req_after_login = session.post(url_login, data=login_data)

# 3. 認証完了後のページで他ページへ遷移を行う
url_group = 'https://ctrl.kp-net.com/settingcontrol/remotevisualization/simplevisualization/enduser/'
req_group = session.get(url_group)

# print('--- ログイン情報 ---')
# print(login_data)
# print('---- 認証ページへのアクセス結果 ---')
# print(re.search(r'<title.*', req_before_login.text).group(0))
# print(req_before_login.status_code)
print('--- 認証完了ページへのアクセス結果 ---')
print(re.search(r'<title.*', req_after_login.text).group(0))
# print(req_after_login.status_code)
# print('--- 認証完了ページからgroupページへのアクセス結果 ---')
# print(re.search(r'<title.*', req_group.text).group(0))
# print(req_group.status_code)

# print(req_group.text)

# utifulSoupを使ってHTMLをパース
soup = BeautifulSoup(req_group.text, 'html.parser')


# 計測データを含む要素を抽出
numeric_area = soup.find('div', class_='numeric_area')

# CSVファイルのヘッダー
header = ['DateTime', 'Generation_Current(kW)', 'Generation_Total(kW)', 'Consumption_Current(kW)', 'Consumption_Total(kW)', 
          'Battery(%)', 'Sell_Current(kW)', 'Sell_Total(kW)', 'Buy_Current(kW)', 'Buy_Total(kW)']

# CSVファイルのパス
csv_file = 'measurement_data.csv'

# データを保持するリスト
data_rows = []

# 現在の日時を取得
jst_time_str = jst_time.strftime('%Y-%m-%d %H:%M:%S')

# 各計測データを取得し、リストに追加
if numeric_area:
        tables = numeric_area.find_all('table')
        # CSVに書き込むデータをリストにまとめる
        data_row = [jst_time_str,'', '', '', '', '', '', '', '', '', '']  # 初期化
        for table in tables:
                data_type = table.find('th').text.strip()  # データの種類（発電、消費、蓄電池、売電、買電）
                rows = table.find_all('tr')[1:]  # 最初の行はヘッダーなので除外
                for row in rows:
                        cells = row.find_all('td')
                        current_text = cells[0].text.strip()  # 現在値
                        total_text = cells[1].text.strip()  # 本日累計
            # 数値のみを抽出する正規表現パターン
                        number_pattern = r'[\d\.]+'  # 数字または小数点のみを抽出するパターン

                # 現在値のテキストから数値のみを抽出
                        current_match = re.search(number_pattern, current_text)
                        if current_match:
                                current_value = float(current_match.group())
                        else:
                                current_value = current_text  # 数値が見つからない場合は蓄電池の状況にする

                # 本日累計のテキストから数値のみを抽出
                        total_match = re.search(number_pattern, total_text)
                        if total_match:
                                total_value = float(total_match.group())
                        else:
                                total_value = None  # 数値が見つからない場合はNoneに
                        print(f"{data_type}: 現在値={current_value}, 本日累計={total_value}")

            # CSVに書き込むデータをリストにまとめる
#               data_row = [current_datetime, '', '', '', '', '', '', '', '', '','']  # 初期化
                        if data_type == '発電':
                                data_row[1] = current_value
                                data_row[2] = total_value
                        elif data_type == '消費':
                                data_row[3] = current_value
                                data_row[4] = total_value
                        elif data_type == '蓄電池':
                                data_row[5] = current_value
                                data_row[6] = total_value
                        elif data_type == '売電':
                                data_row[7] = current_value
                                data_row[8] = total_value
                        elif data_type == '買電':
                                data_row[9] = current_value
                                data_row[10] = total_value
                        data_rows.append(data_row)

    # CSVファイルにデータを書き込む
        with open(csv_file, 'a', newline='') as f:
                writer = csv.writer(f)
                writer.writerow(data_row)

else:
        print("計測データが見つかりませんでした")

とりあえず、リモートで各種値の取得ができることがわかりました。
これで、リモートAWS上のローカルのCSVに出力できます。
これを定期実行・Googleスプレッドシートなどに出力し、記録していきます。

Googleスプレッドシートへ出力

下準備

などを参考に出力先ファイルを作っていましたが、初回起動時のブラウザの起動に失敗してしまいます。
念の為、SSH接続のラズパイでも同じことをしましたが、リモートでの初回起動はできないようです。CUIでブラウザの操作を行おうとしてる感じになるっぽいでした。

気を取り直して

を参考にスプレッドシートへ出力の設定・pythonプログラムの改修を行います。

# -*- coding: utf-8 -*-
# 20240404
import requests
from bs4 import BeautifulSoup
import re
import csv
from datetime import datetime, timedelta
import gspread
from google.oauth2.service_account import Credentials
# お決まりの文句
# 2つのAPIを記述しないとリフレッシュトークンを3600秒毎に発行し続けなければならない
scope = ['https://www.googleapis.com/auth/spreadsheets','https://www.googleapis.com/auth/drive']
#ダウンロードしたjsonファイル名をクレデンシャル変数に設定。
credentials = Credentials.from_service_account_file("bbbbbbbbbb.json", scopes=scope)
#OAuth2の資格情報を使用してGoogle APIにログイン。
gc = gspread.authorize(credentials)
#スプレッドシートIDを変数に格納する。
SPREADSHEET_KEY = 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'
# スプレッドシート（ブック）を開く
workbook = gc.open_by_key(SPREADSHEET_KEY)

# スプレッドシート（ブック）を開く
#workbook = gc.open_by_key(SPREADSHEET_KEY)
# シートの一覧を取得する。（リスト形式）
worksheets = workbook.worksheets()
print(worksheets)
# シートを開く
worksheet = workbook.worksheet('power')


# UTCから日本時間に変換（UTC+9時間）
current_utc_time = datetime.utcnow()
jst_offset = timedelta(hours=9)
jst_time = current_utc_time + jst_offset

# 1.ログインページにアクセスする
# ref:https://rurukblog.com/post/requests-cookies-session/
url_login = "https://ctrl.kp-net.com/settingcontrol/processLogin"
session = requests.session()
# ログインページへのアクセス完了
req_before_login = session.get(url_login)

# ログインするための情報を準備する
login_data = {
   'loginid': 'xxxxxxxxxxxxxxxxxxxxx',
   'loginpassword': 'yyyyyyyyyyyyyyyyyyyyyyy'
}

# ログインするためにcsrfトークンが必要となるため情報を取得
bs = BeautifulSoup(req_before_login.text, 'html.parser')
csrf_token = bs.find(attrs={'name':'_csrf'}).get('content')
login_data['_csrf'] = csrf_token

# 2. ログインページで認証を行い、管理者ページへ遷移する
req_after_login = session.post(url_login, data=login_data)

# 3. 認証完了後のページで他ページへ遷移を行う
url_group = 'https://ctrl.kp-net.com/settingcontrol/remotevisualization/simplevisualization/enduser/'
req_group = session.get(url_group)

# print('--- ログイン情報 ---')
# print(login_data)
# print('---- 認証ページへのアクセス結果 ---')
# print(re.search(r'<title.*', req_before_login.text).group(0))
# print(req_before_login.status_code)
print('--- 認証完了ページへのアクセス結果 ---')
print(re.search(r'<title.*', req_after_login.text).group(0))
# print(req_after_login.status_code)
# print('--- 認証完了ページからgroupページへのアクセス結果 ---')
# print(re.search(r'<title.*', req_group.text).group(0))
# print(req_group.status_code)

# print(req_group.text)

# utifulSoupを使ってHTMLをパース
soup = BeautifulSoup(req_group.text, 'html.parser')


# 計測データを含む要素を抽出
numeric_area = soup.find('div', class_='numeric_area')

# CSVファイルのヘッダー
header = ['DateTime', 'Generation_Current(kW)', 'Generation_Total(kW)', 'Consumption_Current(kW)', 'Consumption_Total(kW)', 
          'Battery(%)', 'Sell_Current(kW)', 'Sell_Total(kW)', 'Buy_Current(kW)', 'Buy_Total(kW)']

# CSVファイルのパス
csv_file = 'measurement_data.csv'

# データを保持するリスト
data_rows = []

# 現在の日時を取得
jst_time_str = jst_time.strftime('%Y-%m-%d %H:%M:%S')

# 各計測データを取得し、リストに追加
if numeric_area:
	tables = numeric_area.find_all('table')
	# CSVに書き込むデータをリストにまとめる
	data_row = [jst_time_str,'', '', '', '', '', '', '', '', '', '']  # 初期化
	for table in tables:
		data_type = table.find('th').text.strip()  # データの種類（発電、消費、蓄電池、売電、買電）
		rows = table.find_all('tr')[1:]  # 最初の行はヘッダーなので除外
		for row in rows:
			cells = row.find_all('td')
			current_text = cells[0].text.strip()  # 現在値
			total_text = cells[1].text.strip()  # 本日累計
            # 数値のみを抽出する正規表現パターン
			number_pattern = r'[\d\.]+'  # 数字または小数点のみを抽出するパターン

		# 現在値のテキストから数値のみを抽出
			current_match = re.search(number_pattern, current_text)
			if current_match:
				current_value = float(current_match.group())
			else:
				current_value = current_text  # 数値が見つからない場合は蓄電池の状況にする

		# 本日累計のテキストから数値のみを抽出
			total_match = re.search(number_pattern, total_text)
			if total_match:
				total_value = float(total_match.group())
			else:
    				total_value = None  # 数値が見つからない場合はNoneに
			print(f"{data_type}: 現在値={current_value}, 本日累計={total_value}")

            # CSVに書き込むデータをリストにまとめる
#		data_row = [current_datetime, '', '', '', '', '', '', '', '', '','']  # 初期化
			if data_type == '発電':
				data_row[1] = current_value
				data_row[2] = total_value
			elif data_type == '消費':
				data_row[3] = current_value
				data_row[4] = total_value
			elif data_type == '蓄電池':
				data_row[5] = current_value
				data_row[6] = total_value
			elif data_type == '売電':
				data_row[7] = current_value
				data_row[8] = total_value
			elif data_type == '買電':
				data_row[9] = current_value
				data_row[10] = total_value
			data_rows.append(data_row)

    # CSVファイルにデータを書き込む
	with open(csv_file, 'a', newline='') as f:
		writer = csv.writer(f)
		writer.writerow(data_row)
#	worksheet.update('A2:K2', data_row)
	index = 2
	worksheet.insert_row(data_row, index)

else:
	print("計測データが見つかりませんでした。")

で、一旦、AWSのshellcloud上で、pythonを使って、ソーラーパネル状況をスプレッドシートへ出力できるようになりました。

あとは

定期実行にまつわるエトセトラを設定・調整することが必要です。
簡単に調べた感じでは、crontabは実質使用できないなどの成約があります。
AWSのcloud9が有力です。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up