OpenSearch上のレコードを全件取得する作業があったので、メモ
Seacrh APIについて
OpenSearchのsearch APIはdefaultでは1万件しかレコードを取得できません。
https://opensearch.org/docs/1.2/opensearch/rest-api/search/
1万件以上取得したい場合はScroll APIを使うとよい
https://opensearch.org/docs/1.2/opensearch/rest-api/scroll/
全件取得
# First, get 10000 records
# scroll=5m -> Snapshot storage time
res=$(curl -X GET -s -u ${OPENSEARCH_USER}:${OPENSEARCH_PASS} ${OPENSEARCH_HOST}/${INDEX_NAME}/_search?scroll=5m -H 'Content-Type: application/json' -d "{ \"size\": 10000 }")
while true; do
# Output logs to file
echo $res | jq -r '.hits.hits' >> ${LOG_FILE}
# Get scroll_id for paging
scroll_id=$(echo $res | jq -r ._scroll_id)
# Get logs after 10000 by using scroll_id
res=$(curl -X GET -s -u ${OPENSEARCH_USER}:${OPENSEARCH_PASS} "${OPENSEARCH_HOST}/_search/scroll" -H 'Content-Type: application/json' -d "{ \"scroll\": \"5m\", \"scroll_id\": \"$scroll_id\" }")
# Check if there are logs
cnt=$(echo $res | jq -r '.hits.hits | length')
if [ $cnt -eq 0 ]; then
break
fi
done