前書き
twitterトロールがtweetを重ねながら、フォラワーを増やし影響力を増す画像を映像化した。
ほとんどは、孤立し、同類のアカウントとの連携は見られなかった。
インポート
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import networkx as nx
import copy
from uuid import uuid4
from PIL import Image
import glob
from google.colab import drive
import time
from IPython.display import Image as getImage
drive.mount('/content/drive/')
data_dir = 'Colab Notebooks/dataset/'
work_dir = 'Colab Notebooks/workspace/'
imp_dir = '/content/drive/My Drive/' + data_dir + 'russian_twitter_trolls/'
img_dir = '/content/drive/My Drive/' + work_dir + 'img/'
gif_dir = '/content/drive/My Drive/' + work_dir + 'export/'
CSVファイルの読み込み
dft = pd.read_csv(imp_dir + 'tweets.csv')
dfu = pd.read_csv(imp_dir + 'users.csv')
print(dft.shape)
print(dfu.shape)
(203482, 16)
(454, 14)
データ整形
dfu = dfu[['id','name']]
dfu = dfu.dropna(how='any')
dft = dft[['user_id','created_str','retweet_count','retweeted', 'text','tweet_id','retweeted_status_id','in_reply_to_status_id']]
dft['created_str'] = pd.to_datetime(dft['created_str'], format='%Y-%m-%d')
dft['created_str'] = dft['created_str'].round('1d')
dft = dft.dropna(subset=['user_id'])
dft = dft.dropna(subset=['created_str'])
dft = dft.dropna(subset=['retweet_count'])
dft = dft[dft['retweet_count'] != 0]
dft['retweet_count'] = dft['retweet_count'].astype('int')
dft['user_id'] = dft['user_id'].astype('int')
dft = dft.sort_values('created_str')
描写
df_day2 = df_day = dft['created_str']
counter = 0
n = 1
for day in df_day:
dft_new = pd.DataFrame(columns=dft.columns)
if counter <= 4:
dft_new = dft[dft['created_str'] <= day]
else:
copy_count = copy.deepcopy(counter)
copy_count2 = copy.deepcopy(counter) - 4
dft_new = dft[ (df_day2.iloc[copy_count] >= dft['created_str']) & (dft['created_str'] > df_day2.iloc[copy_count2])]
counter += 1
#print(dft_new.loc[:,['user_id','created_str','retweet_count']])
net = pd.DataFrame([], columns=['from','to'])
carac = pd.DataFrame([], columns=['ID','color'])
G = nx.Graph()
plt.clf()
for userid, strday, count in zip(dft_new['user_id'], dft_new['created_str'], dft_new['retweet_count']):
for i in range(count):
#mem = str(userid) + '_mem_' + str(i)
mem = str(uuid4())
#uuid = str(uuid4())
#mkuuid = str(userid)
mkuuid = str(userid) + '_' + str(uuid4())
addRow = pd.Series([mem,mkuuid], index=net.columns)
net = net.append(addRow, ignore_index=True)
addRow2 = pd.Series([mem,'skyblue'], index=carac.columns)
carac = carac.append(addRow2, ignore_index=True)
if i == 0:
addRow3 = pd.Series([mkuuid,mem], index=net.columns)
net = net.append(addRow3, ignore_index=True)
addRow4 = pd.Series([mkuuid,'darkorange'], index=carac.columns)
carac = carac.append(addRow4, ignore_index=True)
#print(net.shape)
#print(carac.shape)
# Build your graph
G=nx.from_pandas_edgelist(net, 'from', 'to')
# Plot it
nx.draw(G, with_labels=False, node_color=carac['color'], alpha=0.5, node_shape="o", linewidths=4)
plt.savefig('/content/drive/My Drive/Colab Notebooks/workspace/img/{0:04d}.png'.format(n))
#plt.show()
n = n+1
gif作成
files = sorted(glob.glob(img_dir+'*.png'))
images = list(map(lambda file: Image.open(file), files))
images[0].save(gif_dir+'net_movie.gif', save_all=True, append_images=images[1:], duration=400, loop=0)
gif読み込み
getImage(gif_dir+'net_movie.gif', format='png')