【備忘録】 R分析向けチートシート

Posted at 2025-03-16
R分析用のチートシート

*100%個人向けです
#for分を使った連続変数作成

years <- c(1960,1970,1980,1990,2000,2005,2010,2015,2018)

for (i in 1:length(years)){
  assign(
    paste("graph_ally_", years[i],sep=""), 
    df_ally %>%
      filter(year == years[i]) %>%
      rename(from = stateA_n, to = stateB_n) %>% 
      as_tbl_graph(directed = T)
  )
}


##########################################################
#1. 分析手法 #
##########################################################
library(broom) #回帰分析を表に

#robust標準誤差
library(estimatr)
model <- lm_robust(rata ~ dtreat*time + as.factor(year), 
                   clusters = id,
                   data = df)

iv_robust(new_empinxavg ~ EV | l2CPcol2, 
          data = df, 
          subset = year >= 1987, 
          fixed_effects = ~ year + ccode, 
          clusters = ccode) %>% 
  tidy(conf.int = TRUE)


#RDD estimate
library(rdd)
RDestimate(y~x, data = df, bw = "バンド幅", kernel = "rectangular")

#RDDの図
df %>% 
  filter(between(IVibrd.sc, -0.2, 0.2)) %>% 
  mutate(treatment = if_else(IVibrd.sc > 0, "treated", "controlled")) %>% 
  ggplot(aes(x = IVibrd.sc, y = diff_fh_polity2, colour = treatment)) + 
  geom_point() + 
  geom_smooth(se = FALSE)
ggsave("figures/Carnegie.png")

#IV regression(操作変数法)
library(AER)
ivreg(y ~ x + a + b + c | z + a + b + c, 
      data = df, subset = year >= 1987)
#robustなら
iv_robust(new_empinxavg ~ EV | l2CPcol2, 
          data = df, 
          subset = year >= 1987, 
          fixed_effects = ~ year + ccode, 
          clusters = ccode) %>% 
  tidy(conf.int = TRUE)

#Causal Impact 
library(CausalImpact)
#dfは一列目が必ずresponse variable
#pre.periodはdfがzooでは無い場合に、行番号で入力するのでyear列は不要
#なので一列目がresponse variable 2列目以降がcovariatesとなる。
impact <- CausalImpact(df, pre.period = c(6, 15), post.period = c(16, 43))

#Causal Impactの図
plot(impact)

#fixed effect model (固定効果モデル)
library(plm)
#pooling model (プーリングモデル)
plm(new_empinxavg ~ EV, data = df,
    model = "pooling")

#inidividual fixed model (主体固定効果モデル)
plm(new_empinxavg ~ EV, data = df,
    effect = "individual",
    model = "within",
    index = c("ccode", "year"))

#two-way fixed model (二元固定モデル)
plm(new_empinxavg ~ EV, data = df,
    effect = "twoway",
    model = "within",
    index = c("ccode", "year"))

#独立変数が二項分布(binomial distribution)に従う回帰モデル
glm(y ~ x1 + x2,
    family = binomial(),
    data = df) %>% 
  tidy()

#信頼区間のプロット
glm(y ~ x1 + x2,
    family = binomial(),
    data = df) %>% 
  tidy(conf.int = TRUE) %>% 
  filter(term != "(Intercept)") %>% 
  ggplot() + 
  geom_pointrange(aes(x = estimate, y = erm= conf.low, xmax = conf.high))+
  geom_hline(aes(yintercept = 0), linetype = "dashed")


#ベイズモデル
library(brms)
model <- brm(Outcome ~ Democracy + Military_Personnel + Iron_Steel + Initiator,
             family = binomial(),
             data = df)
#ベイズプロット
plot(model)

#交差項を用いたモデル
#交差項を入れたときは、解釈を誤りやすいので必ずプロットする！
#縦軸が処置効果
#横軸がモデレーター
library(interplot)
model <- lm(totApprov ~ treatment*ideology, 
            data = df_audience)
interplot(model, var1 = "treatment", var2 = "ideology") + 
  geom_hline(aes(yintercept = 0), linetype = "dashed") + 
  labs(x = "Ideology", y = "Conditional Effect")

#さらに細かい情報を足す（interplotの上位互換）
interflex(Y = "totApprov", D = "treatment", X = "ideology", 
          estimator = "binning", nbins = 4, data = df_audience %>% as.data.frame())

#ブートストラップを使ったより詳細なitnerplot
interflex(Y = "totApprov", D = "treatment", X = "ideology", 
          estimator = "kernel", data = df_audience %>% as.data.frame())


#限界効果モデルの作成(モデルは普通に作ってmargins()に引数として渡す)
library(margins)
model <- glm(Outcome ~ Democracy + Military_Personnel + Iron_Steel,
             family = binomial,
             data = df_victory)

model %>% 
  margins() %>% 
  summary()

#予測値のプロット（限界効果）
model %>% 
  cplot("Democracy", draw = FALSE) %>% 
  ggplot() + 
  geom_line(aes(x = xvals, y = yvals)) + 
  geom_ribbon(aes(x = xvals, ymin = lower, ymax = upper), alpha = 0.3)

#対数logを用いたモデル
#政治学の分野では、変数分布がきもかったら対数変換することがおおいっぽい
df_eu_aid %>% 　#対数分布のプロット（ゆがんだ分布を対数を取ることで治せる）
  ggplot() + 
  geom_histogram(aes(x = exp(EV))) + 
  scale_x_log10() 

#causal forestモデル
library(grf)
#まず予測モデルを作成
#yに結果変数、wにトリートメント変数、xに共変量を渡す
tau.forest <- causal_forest(Y = df_audience %>% 
                              pull(totApprov),
                            W = df_audience %>% 
                              pull(treatment),
                            X = df_audience %>% 
                              dplyr::select(gender, education, hispanic, ideology, income, age),
                            tune.parameters = "all",
                            num.trees = 50000)
#randam forestのATEを求める
average_treatment_effect(tau.forest, target.sample = "all")

#randam forestの結果の分布を確認する
tibble(tau = tau.forest$predictions) %>% 
  ggplot() + 
  geom_histogram(aes(x = tau))
# ggsave("figures/hte_distribution.png")

#random forestの中で最も当てはまりのよかったモデルの結果を見る
best_linear_projection(tau.forest, 
                       df_audience %>% 
                         dplyr::select(gender, education, hispanic, ideology, income, age))


#モデレータのみが変動するデータを作成
X.test <- tibble(gender = median(df_audience$gender), 
                 education = median(df_audience$education), 
                 hispanic = median(df_audience$hispanic), 
                 ideology = median(df_audience$ideology), 
                 income = median(df_audience$income),
                 age = 10:70)

#作成したテストデータでどのように変動するかを予測
tau.hat <- predict(tau.forest, X.test, estimate.variance = TRUE)
#分散を作成
sigma.hat <- sqrt(tau.hat$variance.estimates)

#予測値、信頼区間を作成し、プロット
tibble(tau = tau.hat$predictions,
       conf.low = tau.hat$predictions - 1.96 * sigma.hat,
       conf.high = tau.hat$predictions + 1.96 * sigma.hat,
       moderator = 10:70) %>% 
  ggplot() + 
  geom_line(aes(x = moderator, y = tau)) + 
  geom_ribbon(aes(x = moderator, ymin = conf.low, ymax = conf.high), alpha = 0.3) + 
  geom_hline(aes(yintercept = 0), linetype = "dashed")
# ggsave("figures/causal_forest.png")

#logisticモデル　ロジスティックモデル
library(tidymodels)#機械学習(caretも機械学習用)

model <- logistic_reg() %>%　#logistic回帰する機械学習
  set_engine("glm") %>%　#計算モデルは？
  set_mode("classification") %>% 　#回帰か分類か
  fit(outcome ~ ., #.は目的変数以外の全変数を指す
      data = df_power)#モデル式

#予測値と実地の比較
res <- tibble(outcome = df_power$outcome,#実際のカテゴリ
              predict(model, df_power),#予測したカテゴリ
              predict(model, df_power, type = "prob")[,1])#１になる確率

#的中率を計算
res %>% 
  accuracy(outcome, .pred_class)

res %>% 
  conf_mat(outcome, .pred_class)#混同行列(confusion matrix)を求める
#ラベルが偏っている場合には、求める正確性は高くなる

#モザイクプロット
res %>% 
  conf_mat(outcome, .pred_class) %>% 
  autoplot()

#マシューズ相関係数
res %>% 
  mcc(outcome, .pred_class)　

#decison tree(決定木モデル)
model <- decision_tree() %>% 
  set_engine("rpart") %>% 
  set_mode("classification") %>% 
  fit(outcome ~ ., 
      data = df_power)
#png("figures/decision_tree.png")
rpart.plot::rpart.plot(model$fit)
#dev.off()

model <- rand_forest() %>% #ランダムフォレスト
  set_engine("ranger") %>% 
  set_mode("classification") %>% 
  fit(outcome ~ ., 
      data = df_power)

res <- tibble(outcome = df_power$outcome,
              predict(model, df_power),
              predict(model, df_power, type = "prob")[,1])

res %>% 
  accuracy(outcome, .pred_class)

res %>% 
  conf_mat(outcome, .pred_class)

res %>% 
  mcc(outcome, .pred_class)#マシューズ相関係数

df_split <- initial_split(df_power, 0.8)

df_train <- training(df_split)
df_test <- testing(df_split)

model <- rand_forest() %>% 
  set_engine("ranger") %>% 
  set_mode("classification") %>% 
  fit(outcome ~ ., 
      data = df_train)

res <- tibble(outcome = df_test$outcome,
              predict(model, df_test),
              predict(model, df_test, type = "prob")[,1])

res %>% 
  accuracy(outcome, .pred_class)

res %>% 
  conf_mat(outcome, .pred_class)

res %>% 
  mcc(outcome, .pred_class)

res %>% 
  roc_curve(outcome, .pred_1) %>%
  ggplot(aes(x = 1 - specificity, y = sensitivity)) +
  geom_path() +
  geom_abline(lty = 3)
ggsave("figures/power_roc.png")

#read_csv("data/UNVotes.csv") %>% 
#   filter(year == 2018) %>% 
#  write_csv("data/data_vote.csv")

df_vote <- read_csv("data/UNVotes.csv") %>% 
  filter(year == 2018) %>%
  dplyr::select(Country, unres, vote) %>% 
  pivot_wider(Country, names_from = "unres", values_from = "vote")

rc_vote <- df_vote %>% 
  dplyr::select(-Country) %>% 
  as.matrix() %>% 
  rollcall(yea = 1, nay = 3, missing = c(2, 8),
           legis.names = df_vote$Country)

ipoint <- ideal(rc_vote)

df_ideal <- ipoint$xbar %>% 
  as.data.frame() %>% 
  mutate(name = df_vote$Country)

df_ideal %>% 
  ggplot() + 
  geom_density(aes(x = D1))
ggsave("figures/distribution_idealpoint.png")#イデオロギー分布

df_ideal %>% 
  ggplot() + 
  geom_text(aes(x = D1, y = 0, label = name),
            position = position_jitter())
ggsave("figures/country_idealpoint.png") #横軸イデオロギー

# read_excel("data/p5v2018.xls", na = c("-66", "-77", "-88")) %>% 
#   filter(year == max(year)) %>% 
#   drop_na(parcomp, parreg, xrcomp, xropen, xconst) %>% 
#   write_csv("data/data_polity.csv")

df_polity <- read_excel("data/p5v2018.xls", na = c("-66", "-77", "-88")) %>% 
  filter(year == 2018) %>%
  drop_na(parcomp, parreg, xrcomp, xropen, xconst)%>%
  write.csv("data/data_polity.csv")

df_polity <- read_csv("data/data_polity.csv") %>% 
  mutate(parcomp = case_when(parcomp == 5 ~ 6,
                             parcomp == 4 ~ 5,
                             parcomp == 3 ~ 4,
                             parcomp == 2 ~ 2,
                             parcomp == 1 ~ 1,
                             parcomp == 0 ~ 3),
         parreg = case_when(parreg == 5 ~ 3,
                            parreg == 4 ~ 3,
                            parreg == 3 ~ 2,
                            parreg == 2 ~ 1,
                            parreg == 1 ~ 3),
         xrcomp = case_when(xrcomp == 3 ~ 4,
                            xrcomp == 2 ~ 3,
                            xrcomp == 1 ~ 1,
                            xrcomp == 0 ~ 2),
         xropen = case_when(xropen == 4 ~ 6,
                            xropen == 3 ~ 5,
                            xropen == 2 ~ 2,
                            xropen == 1 ~ 1,
                            xropen == 0 ~ 4,
                            xropen == 4 ~ 3))

pca <- princomp(~ parcomp + parreg + xrcomp + xropen + xconst, data = df_polity)
summary(pca)

df_polity <- df_polity %>% 
  mutate(comp1 = pca$scores[,1],
         comp2 = pca$scores[,2])

df_polity %>% 
  count(comp1, comp2, polity) %>% 
  filter(n != 0) %>% 
  ggplot() + 
  geom_point(aes(x = comp1, y = comp2, size = n, colour = polity))
ggsave("figures/polity5_pca.png")

loadings(pca)

kclust <- df_polity %>% 
  dplyr::select(parcomp, parreg, xrcomp, xropen, xconst) %>% 
  kmeans(center = 5)

df_polity %>% 
  mutate(clust = kclust$cluster) %>% 
  count(democ, autoc, clust) %>% 
  ggplot() + 
  geom_jitter(aes(x = democ, y = autoc, size = n, colour = as.factor(clust)))
# ggsave("figures/polity5_kmeans.png")

mix <- df_ideal %>% #混合ガウス分布は、k-meansの確率版
  dplyr::select(D1) %>% 
  Mclust()

df_ideal %>% 
  mutate(group = mix$classification) %>% 
  ggplot() + 
  geom_text(aes(x = D1, y = 0, label = name, colour = as.factor(group)),
            position = position_jitter())
ggsave("figures/vote_mixture.png")

#変数を正規表現で一括削除
rm(list=ls(pattern="model_.*"))

#global 
assign(a, b, envir = .GlobalEnv)




library(tidyverse)

## install rtweet from CRAN
install.packages("rtweet")
install.packages("tidyverse")

## load rtweet package
path.package("rtweet")
find.package("ggplot2")
library(rtweet)
##################
#時系列変換関数
#列に年度が格納されているデータをプロットできる形式に変換する関数を作成
#dfにはもともとの形のデータフレーム, 
#observationに国など、列に年度、valueとして価格などデータをもっている形
#spanには使用する年度を格納する。
#seq(2008,2019, by = 1)など
##################################################################
to_tseries <- function(df, span){
  #統合用に一国分の時系列データ枠を作成
  timedf <- data.frame(Time = span,
                       Country = df[1,1],
                       Price = NA)
  
  for (i in 1:nrow(df)){
    #i国の時系列データ枠を作成
    a <- data.frame(Time = span,
                    Country = df[i,1],
                    Price = NA)
    #jこの時系列情報を格納
    for (j in 1:length(span)){
      a[j,3] <- df[i,j+1]
    }
    #国の情報を統合
    timedf <- rbind(timedf,a)
  }
  #最初に統合用に使った部分を削除
  timedf <- slice(timedf, -c(1:length(span)))
  
  #xts(timedf,order.by = timedf$Time) %>%
  return(timedf)
}


##### dataframe manipulation (データフレーム操作) ####

#特定列の抽出
x <- df %>%
  dplyr::select("列名１","列名２")

#wider dataframe (データフレームを横長にする)
x <- df %>%
  pivot_wider(names_from = "新しく列にする列名", values_from = "動かしたい値が入っている列名")

#longer dataframe(データフレームを縦長にする)
#colsには、動かしたい列名を入れる（ただし、動かしたくない列を-で指定したほうが楽なことも多い。）
#names_toは、動かした列名を束ねる列名を入力する。
#たとえば、列名が年だった場合には、names_to = "year"と指定するとよい。
#values_toは、動かしたい値を束ねる列名を入力する。
#たとえば、gdpなど
x <- df %>% 
  pivot_longer(cols = -regionname, names_to = "year", values_to = "gdpcap")


#左側（右側にデータを結合する/mergeの特殊系）
df <- leftjoin(x,y, by  = "year")

#日付・時間・時系列を文字列から時間変数に。
library(lubridate)

fas_body_agg <- fas_body_agg %>%  
  mutate(year = lubridate::year(created_at)) %>%
  mutate(month = lubridate::month(created_at)) %>%
  mutate(day = lubridate::day(created_at))


#データフレーム内の検索と置換(置換はgsub)
df <- mutate(df,列名=gsub(列名, pattern="検索する文字列",
                          replacement = "置き換える文字列", 
                          ignore.case = TRUE))

#データフレーム内の検索と抽出(抽出はgrep)
df <- df[grep(pattern = "正規表現",
              x = "列名（文字列ベクトル）",
              ignore.case = FALSE, #(大文字を区別),
              value = FALSE),] #Falseならベクトルの番号を、Trueなら値を返す



#csv一括読み込み
csvlist <- list.files(path = "C:/Users/yuhij/OneDrive/デスクトップ/論文関係/自学/OBR/", pattern = "result", full.names = TRUE)
res_tbl <- sapply(csvlist, read.csv, simplify = FALSE) %>%
  bind_cols() %>% 
  data.frame()

#行削除
df[,-c(2,5)]
#列削除
df[-c(2,5),]

#データフレームに列を挿入
mutate(df, col_name = func)
#または
df$colname <- func

#NAを含む列番号をdfから取得
na_index <- which(is.na(df$col))

#na_indexを利用してNAを含む列を削除
df <- df[-na_index,]

#na_indexを利用してNAをmedianに置き換え
df$col[na_index] <- median(df$col, na.rm = TRUE) 

#重複行の削除
df %>%
  distinct(列名：複数OK＆cでくくる必要なし)

#データの統合(merge)
#innerはAかつB、outerはAまたはB、rightはdf1に合わせる、leftはdf2に合わせる
merge(df1, df2, join = "inner|outer|right|left")

#コンソールにすべてのデータを表示
df %>%
  select(列名) %>%
  tibble(options('tibble.print_max' = 1000))


#######################################################

#ランダムサンプリング
# Set seed of 567
set.seed(567)

# Store row numbers randomly (2/3) : index_random
index_random <- sample(1:nrow(df), 2 / 3 * nrow(df))

# Create training set: training_set
random_selected <- df[index_random, ]

#######################################################

#データフレームの並び替え
df[order(df$prob, decreasing=T),]

#データフレームからvectorを作る（selectだとtibbleになる）
df %>% pull(変数)



#相関関係を図で一覧する
pairs(df)

#ロジット回帰分析(1説明変数)
# Build a glm(general liner model) model with variable ir_cat as a predictor
log_model <- glm(explained_col ~ explanatory_col, family = binomial(link = "logit"|"probit"|"cloglog"), data = df)
log_model %>% 
  summary()

#ロジット回帰分析（多説明変数）
log_model <- glm(explained_col ~ explanatory_col1 + explaatory_col2, family = "binomial", data = df)
log_model %>% 
  summary()

#ロジット回帰分析に基づく予測
# Build the logistic regression model
#log_modelを作成したデータに基づき、newdataに入れたデータの予測を行う
predictions_all_small <- predict(log_model, newdata = optional, type = "response")

#左右15%の除去
ifelse(predictions_all_small > 0.15, 1, 0)

#time series ####################################################
#時系列データを作成(xtsは時間情報がIndexとなっている)
library(xts)
matrix <- as.xts(df, order.by = as.Date(df$col,"%d/%m/%Y"))
#リストの場合
matrix <- as.xts(list, order.by = Index)


#特定の時間を時間として認識
As.Date()


#特定の時間を抽出
#2016/01 ~ 2016/03/12
time <- x["2016-01/2016-03-12"]
#2016 am 10:00 ~ 11:00
time <- irreg("T10:00/T11:00")
matrix[time]

#naを最も近い時間(前）データの値で置き換える
xts_matrix <- na.locf(xts_matrix)
#naを最も近い時間(前）データの値で置き換える
xts_matrix <- na.locf(xts_matrix, fromLast = TRUE)
#naを欠損する前と後の時間の平均となるように置き換える
xts_matrix <- na.approx(xts_matrix)

#データベースからフィルタリングして抽出
filter(df, 条件)

#周期を取得
periodicity(xts)
#周期を変更
xts <- to.period(xts, period = "months", OHLC = FALSE, indexAt = "firstof")
#年月日の合計数を表示
nmonths(xts)

#日付の書き方
"20160622/20180611"

#ラグ処理(1周期遅らせる場合)
lag(xts, k = 1)

#xts plot 時系列プロット
#特定の列をプロット
plot.xts(flights_xts$total_flights)

#複数列（前列）をプロット(分割か一緒か）
plot.zoo(flights_xts, plot.type = "multiple|single", lty = lty)
legend("right", lty = lty, legend = labels)

#複数列（特定列）をプロット
plot.zoo(x = xts[ , c("___", "___", "___")])

#時間を比較するため、2つの図を縦に並べてプロット
par(mfrow = c(2,1))
plot.xts()
plot.xts()

#時系列で出力(Csv)
write.zoo(xts, file = ".csv", sep = ",")

#ランク付け
df$rank <- NA
for (i in unique(df$Year)) {
  df$rank[df$Year == i] <- rank(df$Value[df$Year == i])
}

##########################################
#ggplot
##########################################

#基本構文(data spaceを作成)
ggplot(data = df, aes(y = explained, x = explanatory, color = "red")) +
  #点をプロット
  geom_point() +
  #点をずらして表示
  geom_jitter(height = 0.05, width = 0, alpha = 0.5) +
  #直線回帰線をプロット
  geom_smooth(method = "lm", se = FALSE) +
  #Logistic 回帰線をプロット
  geom_smooth(method = "glm", se = FALSE, method.arg = list(family = "binomial")) 

#相関図を全変数作成
library(GGally)
my_fn <- function(data, mapping, method="p", use="pairwise", ...){
  
  # grab data
  x <- eval_data_col(data, mapping$x)
  y <- eval_data_col(data, mapping$y)
  
  # calculate correlation
  corr <- cor(x, y, method=method, use=use)
  
  # calculate colour based on correlation value
  # Here I have set a correlation of minus one to blue, 
  # zero to white, and one to red 
  # Change this to suit: possibly extend to add as an argument of `my_fn`
  colFn <- colorRampPalette(c("blue", "white", "red"), interpolate ='spline')
  fill <- colFn(100)[findInterval(corr, seq(-1, 1, length=100))]
  
  ggally_cor(data = data, mapping = mapping, ...) + 
    theme_void() +
    theme(panel.background = element_rect(fill=fill))
}
variables %>%
  ggpairs(upper = list(continuous = my_fn), lower = list(continuous = "smooth"))  

#よく使うテンプレ
#factorの順番を任意に並び替える（デフォルトはアルファベット）
df_compare$Label <- factor(df_compare$Label, levels = c("Houston","Chicago","Texas","Illinois","Nation"))
sumt %>%
  filter(TIME >=2004) %>%
  ggplot(aes(x=TIME, y = VRE_SHARE))+
  facet_wrap(.~GEO) +
  geom_line()+
  theme_classic() + #かっこよくする（背景白）
  geom_vline(xintercept = c(2008),linetype = "dashed") +#垂直線を追加
  scale_x_continuous(breaks = seq(2004,2019,by=2),"Year") +
  theme(axis.text.x=element_text(angle = -60, hjust=0, vjust = 0))+
  scale_fill_discrete(labels=c("MINOR","WAR"))+ #レジェンドの名前を変更
  scale_colour_discrete(limits = c("farright", "other populist", "farleft")) #レジェンドの表示順を変
  theme(axis.text.x=element_blank())+ #x軸の目盛り消す
  scale_x_discrete(labels =c("単純小選挙区","絶対多数","完全連記","Party Block Vote","小選挙区比例代表並立A","小選挙区比例代表並立B","比例代表（7人未満）", "比例代表（７人以上）", "選好投票","非選好投票","制限投票","ボルダ方式"))+
  ggtitle("aaaa") #タイトル
guides(fill = FALSE) #レジェンドをけす　
labs(x=attr(V8_Scale, "label"), #x軸,y軸,レジェンドの変更
     y=attr(V16, "label"),
     colour=attr(V4_Scale, "label"))

scale_color_discrete(labels=c("95%","90%"))+ #判例のラベルを変更
  
  scale_fill_manual(values = Colours) #色付け
ggsave("VRE_Share_all.png")

#動くラインプロット
df %>%
  filter(Label=="Reported") %>%
  ggplot(aes(x=Year, y=Value, colour = id)) +
  #facet_wrap(.~id,ncol = 3) +
  geom_line() +
  theme_classic() + #かっこよくする（背景白）
  ggtitle("各警察管轄の犯罪数")+
  transition_reveal(Year)

#動くタイルプロット
df %>%
  ggplot(aes(x = rank, group = id)) +
  geom_tile(aes(y = Value/2,
                height = Value,
                fill = id,
                width = 0.9)) +
  geom_text(aes(y = 0, label = paste(id, " ")), vjust = -1, hjust = -0.1) +
  geom_text(aes(y = Value, label = paste0(" ", Value), vjust = 1, hjust = 0)) +
  scale_x_reverse() +
  coord_flip() +
  theme_light()+
  transition_states(Year)


################################################################
#データ収集（e-statのAPIを使用）
################################################################
library(keyring)
library(httr)
library(listviewer)
library(rlist)


keyring::key_set("e-stat") #ここにApplication IDを入力

########################################################
#どんなデータベースがあるのかざっと見る
########################################################

response <- GET(
  url = "https://api.e-stat.go.jp/rest/3.0/app/json/getStatsList",
  query = list(
    appId = keyring::key_get("e-stat"), #ここにID入力
    searchWord = "犯罪統計 AND 刑法犯" #検索したいワードを入力
  )
)

res_content <- content(response)
jsonedit(res_content) #中身の確認

res_content$GET_STATS_LIST$DATALIST_INF$TABLE_INF %>% 
  list.select(id = `@id`,
              table_name = TITLE_SPEC$TABLE_NAME
  ) %>% 
  list.stack() #listをdataframe形式で出力

########################################################
#調べた中からおもしろそうなものを実際に取得
########################################################
response <- GET(
  url = "https://api.e-stat.go.jp/rest/3.0/app/getSimpleStatsData",
  query = list(
    appId = keyring::key_get("e-stat"),
    statsDataId = "0003191320" #欲しい統計表に対応するIDを入力
    
  )
)
response

#csvとして読み込み
res_content <- content(response) %>% 
  read.csv(text = sub('(?s).*"VALUE"\n', "", res_content, perl = TRUE))
You get articles that match your needs
You can efficiently read back useful information
You can use dark theme
What you can do with signing up