内容
bigquery, MySQLからpandas dataframeを作る関数と、dataframeからBigqueryへ保存とGCSへのCSV保存をやりたく、ついにまとめたので、ちと記事作成。
コード
sample.py
import pandas as pd
import os
import pymysql
import dsclient
# init client with project id and creadential
client = dsclient.Client(
"YOUR_GCP_PROJECT_NAME",
"/hogehoge/hogehoge/credentials.json"
)
def bq2df(query):
# read data with big query
return client.query(query)
def mysql2df(query):
# read data from MySQL with Google Cloud SQL
connection = pymysql.connect(
host='XXXXXXX',
user='YOUR_USERNAME',
password='YOUR_PASSWORD',
db='YOUR_DBNAME',
charset='utf8',
cursorclass=pymysql.cursors.DictCursor
)
return pd.read_sql(sql, connection)
def df2bq(df, dataset, table):
# store data with big query
tb_name = dataset + "." + table
client.load(df, tb_name)
def df2gcs(df, folder, file_name, bucket="gs://hogehoge/"):
# store data to Google Cloud Storage as csv
buket_name = os.path.join(bucket, folder, file_name)
client.write_csv(df, buket_name)
if __name__ == "__main__":
df = pd.DataFrame([i for i in range(100)], columns=["test"])
folder = "write_test"
file_name = "test1.csv"
df2gcs(df, folder, file_name)
英語のコメントアウトは気にしないでちょ
各hogehoge等を変更してガムバッテクレ
これ超便利。
参考サイト