この記事では、Pythonを使用してSlackのダイレクトメッセージやスレッドのやりとりをExcelにエクスポートする方法を説明します。(個人情報については注意ください。)
参考Github
https://github.com/thama125/slack-export
必要なライブラリのインポート
import json
import logging
from pathlib import Path
from typing import Any, Literal
import pandas as pd
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
ロガーの設定
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
Slack APIクライアントの実装
class Client:
def __init__(self, token) -> None:
self._token = token
self._headers = {"Authorization": "Bearer {}".format(token)}
self._session = requests.Session()
self._session.mount(
"https://slack.com/",
HTTPAdapter(max_retries=Retry(total=5, backoff_factor=3)),
)
APIコールを行うヘルパーメソッド
def _call(self, url, params=None) -> dict[str, Any]:
if not params:
params = {}
response = self._session.get(url, headers=self._headers, params=params, timeout=3)
response.raise_for_status()
return response.json()
ユーザー情報の取得
def fetch_users(self) -> list[dict[str, Any]]:
response = self._call("https://slack.com/api/users.list")
return response["members"]
ダイレクトメッセージチャンネルの取得
def fetch_channels(self) -> list[dict[str, Any]]:
response = self._call(
"https://slack.com/api/conversations.list",
params={"types": "im", "exclude_archived": True},
)
return [channel for channel in response["channels"] if channel["id"] == "ここにチャンネルID"]
メッセージの取得
def fetch_messages(self, channel_id: str) -> list[dict[str, Any]]:
messages = []
next_cursor = None
while True:
params = {"channel": channel_id, "limit": 200}
if next_cursor:
params["cursor"] = next_cursor
response = self._call("https://slack.com/api/conversations.history", params=params)
messages += response["messages"]
if response["has_more"]:
next_cursor = response["response_metadata"]["next_cursor"]
else:
break
return messages
スレッド内のメッセージの取得
replies = []
next_cursor = None
while True:
payload = {"channel": channel_id, "limit": 200, "ts": thread_ts}
if next_cursor:
payload["cursor"] = next_cursor
response = self._call("https://slack.com/api/conversations.replies", params=payload)
done = False
for message in response["messages"]:
if message["ts"] == thread_ts and len(replies) > 0:
done = True
break
replies.append(message)
if done:
break
elif response["has_more"]:
next_cursor = response["response_metadata"]["next_cursor"]
return replies
メイン関数
def main(
token: str,
output_dir: Path,
output_format: Literal["json", "jsonl", "excel"] = "jsonl",
) -> None:
output_dir.mkdir(parents=True, exist_ok=True)
client = Client(token)
logger.info("Fetching users")
users = {user["id"]: user for user in client.fetch_users()}
logger.info(f"{len(users)} users fetched")
logger.info("Fetching channels")
channels = client.fetch_channels()
logger.info(f"{len(channels)} channels fetched")
for channel in channels:
channel_id = channel["id"]
channel_name = channel.get("name") or users.get(channel.get("user"), {}).get("name", "unknown_channel")
logger.info(f"Fetching messages: {channel_name=}")
messages = client.fetch_messages(channel_id)
messages_and_replies = []
for message in reversed(messages):
thread_ts = message.get("thread_ts")
if not thread_ts:
messages_and_replies.append(message)
continue
replies = client.fetch_replies(channel_id, thread_ts)
messages_and_replies += replies
logger.info(f"{len(messages_and_replies)} messages/replies fetched")
if output_format == "excel":
output_path = f"{output_dir / channel_name}.xlsx"
df = pd.DataFrame(messages_and_replies)
df.to_excel(output_path, index=False)
else:
output_path = f"{output_dir / channel_name}.{output_format}"
with open(output_path, "w", encoding="utf-8") as f:
if output_format == "json":
json.dump(
messages_and_replies,
f,
indent=4,
ensure_ascii=False,
sort_keys=True,
)
elif output_format == "jsonl":
for message in messages_and_replies:
json.dump(message, f, ensure_ascii=False, sort_keys=True)
f.write("\n")
スクリプトの実行
main('', Path('output'), 'excel')