import requests, json, datetime, pytz, logging
import boto3, botocore
from apscheduler.schedulers.blocking import BlockingScheduler
logging.basicConfig()
sched = BlockingScheduler()
@sched.scheduled_job('cron', minute='10', hour='*/1')
def job_crawl():
print('[cron.py:job_crawl] Start.')
####################################
# API Keys
####################################
OPEN_EXCHANGE_API_URL = 'https://openexchangerates.org/api/latest.json?app_id='
OPEN_EXCHANGE_APP_ID = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
AWS_ACCESS_KEY_ID = 'xxxxxxxxxxxxxxxx'
AWS_SECRET_ACCESS_KEY = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
AWS_REGION_NAME = 'xx-xxxxx-x'
AWS_S3_BUCKET_NAME = 'xxxxxxxxxxx'
####################################
# Retrieve json data from openexchangerates.com
####################################
res = requests.get(OPEN_EXCHANGE_API_URL + OPEN_EXCHANGE_APP_ID)
json_data = json.loads(res.text.decode('utf-8'))
del json_data['disclaimer']
del json_data['license']
json_text = json.dumps(json_data)
timestamp = json_data['timestamp']
exchange_date = datetime.datetime.fromtimestamp(timestamp, tz=pytz.utc)
####################################
# Upload json data to S3 bucket
####################################
if json_text:
#
# AWS Session
#
session = boto3.session.Session(aws_access_key_id=AWS_ACCESS_KEY_ID,
aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
region_name=AWS_REGION_NAME)
s3 = session.resource('s3')
bucket = s3.Bucket(AWS_S3_BUCKET_NAME)
#
# Upload Latest
#
bucket_latest_key_name = 'exchange/latest.json'
obj = bucket.Object(bucket_latest_key_name)
response = obj.put(
Body=json_text.encode('utf-8'),
ContentEncoding='utf-8',
ContentType='application/json'
)
#
# Upload Daily Data
#
bucket_prefix_daily = "{0:%Y-%m-%d}".format(exchange_date)
bucket_daily_key_name = 'exchange/' + bucket_prefix_daily + '/' + bucket_prefix_daily + '.json'
obj = bucket.Object(bucket_daily_key_name)
response = obj.put(
Body=json_text.encode('utf-8'),
ContentEncoding='utf-8',
ContentType='application/json'
)
#
# Upload Hourly Data
#
bucket_hourly_prefix = "{0:%Y-%m-%d-%H}".format(exchange_date)
bucket_hourly_key_name = 'exchange/' + bucket_prefix_daily + '/' + bucket_hourly_prefix + '.json'
try:
# If json file already exists, do nothing
s3.Object(AWS_S3_BUCKET_NAME, bucket_hourly_key_name).load()
except botocore.exceptions.ClientError as e:
# If json file doesn't exists
obj = bucket.Object(bucket_hourly_key_name)
response = obj.put(
Body=json_text.encode('utf-8'),
ContentEncoding='utf-8',
ContentType='application/json'
)
print('[cron.py:job_crawl] Done.')
sched.start()