LoginSignup
1
1

More than 1 year has passed since last update.

時系列データを用いたtwitterトロールのネットワーク分析

Posted at

前書き

twitterトロールがtweetを重ねながら、フォラワーを増やし影響力を増す画像を映像化した。

ほとんどは、孤立し、同類のアカウントとの連携は見られなかった。

インポート

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import networkx as nx
import copy
from uuid import uuid4
from PIL import Image
import glob
from google.colab import drive
import time
from IPython.display import Image as getImage


drive.mount('/content/drive/')
data_dir = 'Colab Notebooks/dataset/'
work_dir = 'Colab Notebooks/workspace/'
imp_dir = '/content/drive/My Drive/' + data_dir + 'russian_twitter_trolls/'
img_dir = '/content/drive/My Drive/' + work_dir + 'img/'
gif_dir = '/content/drive/My Drive/' + work_dir + 'export/'

CSVファイルの読み込み

dft = pd.read_csv(imp_dir + 'tweets.csv')
dfu = pd.read_csv(imp_dir + 'users.csv')

print(dft.shape)
print(dfu.shape)
(203482, 16)
(454, 14)

データ整形

dfu = dfu[['id','name']]
dfu = dfu.dropna(how='any')

dft = dft[['user_id','created_str','retweet_count','retweeted', 'text','tweet_id','retweeted_status_id','in_reply_to_status_id']]
dft['created_str'] = pd.to_datetime(dft['created_str'], format='%Y-%m-%d')
dft['created_str'] = dft['created_str'].round('1d')
dft = dft.dropna(subset=['user_id'])
dft = dft.dropna(subset=['created_str'])
dft = dft.dropna(subset=['retweet_count'])
dft = dft[dft['retweet_count'] != 0]
dft['retweet_count'] = dft['retweet_count'].astype('int')
dft['user_id'] = dft['user_id'].astype('int')
dft = dft.sort_values('created_str')

描写

df_day2 = df_day = dft['created_str']
counter = 0
n = 1
for day in df_day:
  dft_new = pd.DataFrame(columns=dft.columns)
  if counter <= 4:
    dft_new = dft[dft['created_str'] <= day]
  else:
    copy_count = copy.deepcopy(counter)
    copy_count2 = copy.deepcopy(counter) - 4
    dft_new = dft[ (df_day2.iloc[copy_count] >= dft['created_str']) & (dft['created_str'] > df_day2.iloc[copy_count2])]
  counter += 1
  #print(dft_new.loc[:,['user_id','created_str','retweet_count']])
  net = pd.DataFrame([], columns=['from','to'])
  carac = pd.DataFrame([], columns=['ID','color'])
  G = nx.Graph()
  plt.clf()

  for userid, strday, count in zip(dft_new['user_id'], dft_new['created_str'], dft_new['retweet_count']):

    for i in range(count):
      #mem = str(userid) + '_mem_' + str(i)
      mem = str(uuid4())
      #uuid = str(uuid4())
      #mkuuid = str(userid)
      mkuuid = str(userid) + '_' + str(uuid4())

      addRow = pd.Series([mem,mkuuid], index=net.columns)
      net = net.append(addRow, ignore_index=True)

      addRow2 = pd.Series([mem,'skyblue'], index=carac.columns)
      carac = carac.append(addRow2, ignore_index=True)

      if i == 0:
        addRow3 = pd.Series([mkuuid,mem], index=net.columns)
        net = net.append(addRow3, ignore_index=True)

        addRow4 = pd.Series([mkuuid,'darkorange'], index=carac.columns)
        carac = carac.append(addRow4, ignore_index=True)

  #print(net.shape)
  #print(carac.shape)

  # Build your graph
  G=nx.from_pandas_edgelist(net, 'from', 'to')
  # Plot it
  nx.draw(G, with_labels=False, node_color=carac['color'], alpha=0.5, node_shape="o", linewidths=4)
  plt.savefig('/content/drive/My Drive/Colab Notebooks/workspace/img/{0:04d}.png'.format(n))
  #plt.show()
  n = n+1

gif作成

files = sorted(glob.glob(img_dir+'*.png'))
images = list(map(lambda file: Image.open(file), files))

images[0].save(gif_dir+'net_movie.gif', save_all=True, append_images=images[1:], duration=400, loop=0)

gif読み込み

getImage(gif_dir+'net_movie.gif', format='png')

Unknown.png

1
1
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
1