図の右上のshow
ボタンを押すとRのコードが表示されます。
library(conflicted)
library(tidyverse)
data.frame(
year = 1990:2022,
population = c(
1.23611, 1.24101, 1.24567, 1.24938, 1.25265, 1.2557, 1.25859, 1.26157,
1.26472, 1.26667, 1.26926, 1.27316, 1.27486, 1.27694, 1.27787, 1.27768,
1.27901, 1.28033, 1.28084, 1.28032, 1.28057, 1.27834, 1.27593, 1.27414,
1.27237, 1.27095, 1.27042, 1.26919, 1.26749, 1.26555, 1.26146, 1.25502,
1.24947
)
) |>
ggplot(aes(x = year, y = population)) +
geom_line() +
geom_point() +
labs(x = "西暦", y = "日本の総人口 [億人]",
title = "時間遷移を折れ線グラフで描画する") +
theme(aspect.ratio = 3/5) #縦横比
library(conflicted)
library(tidyverse)
data.frame(
year = 1990:2022,
population = c(
1.23611, 1.24101, 1.24567, 1.24938, 1.25265, 1.2557, 1.25859, 1.26157,
1.26472, 1.26667, 1.26926, 1.27316, 1.27486, 1.27694, 1.27787, 1.27768,
1.27901, 1.28033, 1.28084, 1.28032, 1.28057, 1.27834, 1.27593, 1.27414,
1.27237, 1.27095, 1.27042, 1.26919, 1.26749, 1.26555, 1.26146, 1.25502,
1.24947
)
) |>
mutate(next_year_population = lead(population)) |> # 次の年の人口列を追加
drop_na() |> # 最後の行を削除(次の年の値がないため
ggplot(aes(x = population, y = next_year_population)) +
geom_point() +
labs(
x = "ある年の日本の総人口 [億人]",
y = "次の年の日本の総人口 [億人]",
title = "日本の総人口の前年比較"
) +
coord_cartesian(xlim = c(1.23, 1.29), ylim = c(1.23, 1.29)) +
theme(aspect.ratio = 1) #縦横比
library(conflicted)
library(tidyverse)
library(patchwork)
# データの定義
data <- data.frame(
t = 0:100,
x_t = c(
0.2, 0.64, 0.9216, 0.28901376, 0.821939226,
0.585420539, 0.970813326, 0.113339247, 0.401973849, 0.961563495,
0.14783656, 0.503923646, 0.99993842, 0.000246305, 0.000984976,
0.003936025, 0.015682131, 0.061744808, 0.231729548, 0.712123859,
0.820013873, 0.590364483, 0.967337041, 0.126384362, 0.441645421,
0.986378972, 0.053741981, 0.203415122, 0.648149641, 0.912206736,
0.320342428, 0.870892628, 0.449754634, 0.989901613, 0.039985639,
0.153547151, 0.519881693, 0.998418873, 0.006314507, 0.025098538,
0.097874404, 0.35318002, 0.913775574, 0.315159096, 0.863335361,
0.471949661, 0.996852714, 0.012549522, 0.049568127, 0.188444511,
0.611732709, 0.950063207, 0.189772438, 0.61503544, 0.94706739,
0.200522995, 0.641254094, 0.920189124, 0.293764402, 0.829867512,
0.564749697, 0.983229907, 0.065955428, 0.246421239, 0.742791249,
0.764209638, 0.720773069, 0.805037009, 0.627809693, 0.934658729,
0.244287156, 0.738443765, 0.772578283, 0.702804318, 0.835481634,
0.549808293, 0.990076536, 0.039299956, 0.151021877, 0.512857079,
0.999338782, 0.002643123, 0.010544547, 0.041733437, 0.15996703,
0.537510318, 0.994371904, 0.022385682, 0.087538253, 0.319501229,
0.869680775, 0.4533445, 0.991293057, 0.034524528, 0.13333034,
0.462213442, 0.994288704, 0.022714708, 0.088795, 0.323641792,
0.87559113
)
) |>
mutate(lag_x_t = dplyr::lag(x_t))
p1 <- data |>
ggplot(aes(x = t, y = x_t)) +
geom_line() +
geom_point() +
labs(
x = expression(italic(t)),
y = expression(italic(x[t])),
title = "折れ線グラフによる可視化"
) +
theme(aspect.ratio = 4/5) #縦横比
p2 <- data |>
drop_na() |>
ggplot(aes(x = lag_x_t, y = x_t)) +
geom_point() +
labs(
x = expression(italic(x[t])),
y = expression(italic(x[t+1])),
title = "散布図による可視化"
) +
theme(aspect.ratio = 4/5) #縦横比
# 描画レイアウトの設定と表示
p1 + p2
library(conflicted)
library(tidyverse)
library(mvtnorm)
library(patchwork)
# 個体の位置データ
set.seed(0)
individual <- matrix(abs(rnorm(200)), ncol = 2)
# 個体1のデータ生成
individual1 <- individual[,1] / sum(individual[, 1])
# 個体2のデータ生成
individual2 <- individual[, 1] * 0.6 + individual[, 2] * 0.4
individual2 <- individual2 / sum(individual2)
df <- data.frame(個体X = individual1, 個体Y = individual2)
p1 <- df |>
pivot_longer(everything()) |>
mutate(X2 = rep(c(1:100), 2)) |>
ggplot(aes(x = X2, y = value, fill = name)) +
geom_col() +
labs(
x = "滞在した場所ID",
y = "滞在時間割合",
title = "2個体の「行動」がどれだけ似ているか"
) +
theme(
aspect.ratio = 1/2,
legend.title = element_blank(),
legend.direction = "horizontal",
legend.position = c(0.5, 0.85)
)
p2 <- df |>
ggplot(aes(x = 個体X, y = 個体Y)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
labs(x = "個体Xの行動データ", y = "個体Yの行動データ") +
theme(aspect.ratio = 1) #縦横比
p1 + p2
library(conflicted)
library(tidyverse)
library(patchwork)
# ヒートマップを生成
# 2変量正規分布からデータ生成
x <- rmvnorm(
n = 100,
mean = c(0, 0),
sigma = matrix(c(1, 0.8, 0.8, 1), nrow = 2)
)
# 正規化関数
my_std <- function(x) {
(x - min(x)) / (max(x) - min(x))
}
# 正規化実行
x1 <- my_std(x[, 1])
x2 <- my_std(x[, 2])
# ヒートマップ用のデータ分割
heatmap1 <- matrix(x1, nrow = 10, ncol = 10)
heatmap2 <- matrix(x2, nrow = 10, ncol = 10)
# ヒートマップ描画
hm1 <- heatmap1 |>
as.data.frame() |>
pivot_longer(everything()) |>
mutate(name2 = paste0("W", sort(rep(1:10, 10)))) |>
ggplot(aes(x = name, y = name2, fill = value)) +
geom_tile() +
scale_fill_gradientn("value", colours = gray.colors(12, start = 0, end = 1, gamma = 2.2, alpha = 1)) +
theme(
aspect.ratio = 1,
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
legend.position = "none"
) +
labs(title = "個体Xの見た目データ")
hm2 <- heatmap2 |>
as.data.frame() |>
pivot_longer(everything()) |>
mutate(name2 = paste0("W", sort(rep(1:10, 10)))) |>
ggplot(aes(x = name, y = name2, fill = value)) +
geom_tile() +
scale_fill_gradientn("value", colours = gray.colors(12, start = 0, end = 1, gamma = 2.2, alpha = 1)) +
theme(
aspect.ratio = 1,
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
legend.position = "none"
) +
labs(title = "個体Yの見た目データ")
p3 <- data.frame(x1, x2) |>
ggplot(aes(x = x1, y = x2)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm",formula = y ~ x, se = FALSE, color = "blue") +
labs(x = "個体Xの見た目データ", y = "個体Yの見た目データ") +
coord_fixed() + # アスペクト比を等しく設定
scale_x_continuous(limits = c(0,1)) +
scale_y_continuous(limits = c(0,1))
hm1 + hm2 + p3
library(conflicted)
library(tidyverse)
library(MASS)
n <- 190 # データ点の数
low <- -0.45 # データの範囲の下限
high <- 0.95 # データの範囲の上限
rho <- 0.7 # 相関係数
mean <- c(0, 0) # 平均(二変数)
Sigma <- matrix(c(1, rho, rho, 1), nrow=2) # 共分散行列
set.seed(0)
data <- mvrnorm(n = n-1, mu = mean, Sigma = Sigma) # 多変量正規分布からデータ生成
x <- data[,1]
y <- data[,2]
x <- low + (high - low) * (x - min(x)) / (max(x) - min(x)) # xデータの正規化
y <- low + (high - low) * (y - min(y)) / (max(y) - min(y)) # yデータの正規化
x <- c(x, 0.80) # 特定の点を追加
y <- c(y, 0.72) # 特定の点を追加
df <- data.frame(x = x, y = y)
df |>
ggplot(aes(x = x, y = y)) +
geom_point(alpha = 0.5) + # 散布図の描画
geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + # 回帰直線の描画
geom_point(aes(x = 0.80, y = 0.72), colour = "red", size = 2) + # 特定の点を赤色で描画
coord_fixed() + # アスペクト比を等しく設定
annotate("text", x = -0.25, y = 0.75, size = 4,
label=paste0("r = ", round(cor(df$x, df$y), 2))) +
labs(
x = "個体間の見た目類似度スコア",
y = "個体間の行動類似度スコア",
title = "2つの類似度スコアの関係"
)
library(conflicted)
library(tidyverse)
library(patchwork)
# データフレームの作成
df <- data.frame(
Country = c("日本", "ブラジル", "米国", "中国"), # 国
Population = c(124620000, 215802222, 335540000, 1425849288) / 100000000 # 人口
)
# 米国と日本のみを含む棒グラフの描画
p1 <- df |>
dplyr::filter(Country %in% c("米国","日本")) |>
arrange(desc(Population)) |>
ggplot(aes(x = Population, y = reorder(Country, Population), fill = Country)) +
geom_col() +
labs(x = "人口[億人]", y = "国名")
# 四か国を含む棒グラフの描画
p2 <- df |>
arrange(desc(Population)) |>
ggplot(aes(x = Population, y = reorder(Country, Population), fill = Country)) +
geom_col() +
labs(x = "人口[億人]", y = "国名")
p1 / p2
library(conflicted)
library(tidyverse)
data <- data.frame(
x = c(0.204, 1.07, -0.296, 0.57, 0.637, 0.82, 0.137, -0.046),
y = c(0.07, 0.57, 0.936, 1.436, 0.32, 1.003, 1.186, 0.503)
)
square_points <- data.frame(
x = c(-0.296, 0.57, 1.07, 0.204, -0.296),
y = c(0.936, 1.436,0.57,0.07, 0.936)
)
data |>
ggplot(aes(x = x, y = y)) +
geom_point() +
geom_path(square_points, mapping = aes(x = x, y = y), color = "red",
linetype = "dashed") +
coord_fixed() +
labs(title = "視覚情報による提示")
エッジが張られていないノードがネットワーク図で消えてしまう。
kohskeさんのご指摘ですべてのノードを表示できるようになりました!
library(conflicted)
library(tidyverse)
library(igraph)
library(tidygraph)
library(ggraph)
pref_ja <- c(
"北海道", "青森", "岩手", "宮城", "秋田", "山形", "福島", "茨城", "栃木",
"群馬", "埼玉", "千葉", "東京", "神奈川", "新潟", "富山", "石川", "福井",
"山梨", "長野", "岐阜", "静岡", "愛知", "三重", "滋賀", "京都", "大阪",
"兵庫", "奈良", "和歌山", "鳥取", "島根", "岡山", "広島", "山口", "徳島",
"香川", "愛媛", "高知", "福岡", "佐賀", "長崎", "熊本", "大分", "宮崎",
"鹿児島", "沖縄"
)
pref_ja <- factor(pref_ja, levels = pref_ja)
# データをロード
data_df <- read_csv(
"https://raw.githubusercontent.com/tkEzaki/data_visualization/main/1%E7%AB%A0/data/matrix.csv"
)
# 前処理
data_matrix <- data_df[, -1] |>
as.matrix(nrow = 47)
diag(data_matrix) <- 0
data_matrix <- log1p(data_matrix)
rownames(data_matrix) <- colnames(data_matrix) <- pref_ja
# しきい値を計算
threshold <- quantile(data_matrix, .95)
# しきい値をもとにグラフデータを作る
gData <- data_matrix |>
as.data.frame() |>
rownames_to_column(var = "pref") |>
pivot_longer(!pref) |>
rename(
from = pref,
to = name,
weight = value
) # |> dplyr::filter(weight >= threshold)
# グラフオブジェクトを生成
g <- graph_from_data_frame(gData, directed=TRUE)
g <- delete_edges(g, which(edge_attr(g)$weight<threshold))
E(g)$width <- E(g)$weight/5
# E(g)$color <- rainbow(100, alpha = 0.5)[round(E(g)$weight/max(E(g)$weight) * 100, 0)]
E(g)$color <- round(E(g)$weight/max(E(g)$weight) * 100, 0)
V(g)$label.cex <- 0.8
V(g)$size <- 12
# par(mar = c(0,0,0,0))
# g |>
# plot(
# layout = layout_in_circle(g, order = pref_ja),
# edge.curved = 1.1, #0.5,
# edge.arrow.size = 0.5
# )
# tidygraphへ変換
g_tidy <- as_tbl_graph(g)
g_tidy |>
ggraph(layout = "linear", circular = TRUE) +
geom_edge_arc(
aes(width = width, color = color),
arrow = arrow(length = unit(4, "mm"), type = "closed"),
strength = 2,
start_cap = circle(5, "mm"),
end_cap = circle(5, "mm")
) +
scale_edge_color_viridis(option = "turbo", alpha = 0.5) +
geom_node_label(aes(label = name), repel = FALSE) +
theme_void() +
theme(legend.position = "none")
library(conflicted)
library(tidyverse)
library(pheatmap)
library(viridisLite)
pref_ja <- c(
"北海道", "青森", "岩手", "宮城", "秋田", "山形", "福島", "茨城", "栃木",
"群馬", "埼玉", "千葉", "東京", "神奈川", "新潟", "富山", "石川", "福井",
"山梨", "長野", "岐阜", "静岡", "愛知", "三重", "滋賀", "京都", "大阪",
"兵庫", "奈良", "和歌山", "鳥取", "島根", "岡山", "広島", "山口", "徳島",
"香川", "愛媛", "高知", "福岡", "佐賀", "長崎", "熊本", "大分", "宮崎",
"鹿児島", "沖縄"
)
# データをロード
data_df <- read_csv(
"https://raw.githubusercontent.com/tkEzaki/data_visualization/main/1%E7%AB%A0/data/matrix.csv"
)
# 前処理
data_matrix <- data_df[, -1] |>
as.matrix(nrow = 47)
data_matrix <- log1p(data_matrix)
rownames(data_matrix) <- colnames(data_matrix) <- pref_ja
# ヒートマップ
pheatmap(
data_matrix,
clustering_distance_rows = "euclidean",
clustering_method = "ward.D2",
border_color = NA,
color = turbo(50)
)
library(tidyverse)
library(viridis)
library(patchwork)
# データ
data_df <- data.frame(
code = 1:47,
prefectures = c(
"北海道", "青森", "岩手", "宮城", "秋田", "山形", "福島", "茨城", "栃木",
"群馬", "埼玉", "千葉", "東京", "神奈川", "新潟", "富山", "石川", "福井",
"山梨", "長野", "岐阜", "静岡", "愛知", "三重", "滋賀", "京都", "大阪",
"兵庫", "奈良", "和歌山", "鳥取", "島根", "岡山", "広島", "山口", "徳島",
"香川", "愛媛", "高知", "福岡", "佐賀", "長崎", "熊本", "大分", "宮崎",
"鹿児島", "沖縄"
),
shipments = c(
42334, 31897, 56667, 177842, 31295, 39663, 110027, 307752, 173267, 186811,
579061, 299455, 580563, 368026, 83590, 79298, 42741, 53484, 99220, 136407,
190720, 240332, 568222, 159106, 136137, 110115, 432778, 291430, 136335,
35777, 37678, 22612, 108003, 176715, 65910, 29745, 48454, 49767, 20554,
181309, 95824, 34208, 66604, 41907, 37888, 44466, 5240
)/1000 # 桁が大きいので千t単位に調整
)
p1 <- data_df |>
ggplot(aes(x = reorder(prefectures, code), y = shipments, fill = shipments)) +
geom_col() +
labs(x = "", y = "出荷量[千t]", title = "都道府県コード順") +
scale_fill_viridis(option = "turbo") +
theme(
axis.text.x = element_text(angle = 270, hjust = 1),
aspect.ratio = 1/3,
legend.position = "none"
) #縦横比
p2 <- data_df |>
ggplot(aes(x = reorder(prefectures, desc(shipments)), y = shipments, fill = shipments)) +
geom_col() +
labs(x = "", y = "出荷量[千t]", title = "出荷量順") +
scale_fill_viridis(option = "turbo") +
theme(
axis.text.x = element_text(angle = 270, hjust = 1),
aspect.ratio = 1/3,
legend.position = "none"
)
p1 / p2
参考:
library(conflicted)
library(tidyverse)
library(sf)
library(NipponMap)
library(viridisLite)
data_df <- data.frame(
code = 1:47,
prefectures = c(
"北海道", "青森", "岩手", "宮城", "秋田", "山形", "福島", "茨城", "栃木",
"群馬", "埼玉", "千葉", "東京", "神奈川", "新潟", "富山", "石川", "福井",
"山梨", "長野", "岐阜", "静岡", "愛知", "三重", "滋賀", "京都", "大阪",
"兵庫", "奈良", "和歌山", "鳥取", "島根", "岡山", "広島", "山口", "徳島",
"香川", "愛媛", "高知", "福岡", "佐賀", "長崎", "熊本", "大分", "宮崎",
"鹿児島", "沖縄"
),
shipments = c(
42334, 31897, 56667, 177842, 31295, 39663, 110027, 307752, 173267, 186811,
579061, 299455, 580563, 368026, 83590, 79298, 42741, 53484, 99220, 136407,
190720, 240332, 568222, 159106, 136137, 110115, 432778, 291430, 136335,
35777, 37678, 22612, 108003, 176715, 65910, 29745, 48454, 49767, 20554,
181309, 95824, 34208, 66604, 41907, 37888, 44466, 5240
)/1000 # 桁が大きいので千t単位に調整
)
Nippon_map <- read_sf(
system.file("shapes/jpn.shp", package = "NipponMap")[1],
crs = "+proj=longlat +datum=WGS84"
)
Nippon_map$geometry[1] <- Nippon_map$geometry[1] + c(-11, -4)
Nippon_map$geometry[47] <- Nippon_map$geometry[47] + c(12, 5)
ggplot()+
geom_sf(data=Nippon_map, aes(fill = data_df$shipments)) +
scale_fill_viridis(option = "turbo", trans = "log10") +
annotate("segment", x=129, xend=134.2, y=37, yend=37, color="gray", linewidth = 1) +
annotate("segment", x=134.2, xend=138.5, y=37, yend=41, color="gray", linewidth = 1) +
annotate("segment", x=139.8, xend=141, y=32.2, yend=32.2, color="gray", linewidth = 1) +
annotate("segment", x=138.5, xend=139.8, y=31, yend=32.2, color="gray", linewidth = 1) +
theme(aspect.ratio = 1, axis.text = element_blank(),
axis.title = element_blank(), axis.ticks = element_blank()) +
labs(title = "地図で提示", fill="出荷量[千t](対数軸)", x="", y="", caption="Nippomap")
library(conflicted)
library(tidyverse)
library(patchwork)
# データ生成
set.seed(0)
df <- tibble(x1 = runif(50), y1 = runif(50), x2 = runif(50),
y2 = 0.8 * (x2 - 0.5) + 0.5 + 0.1 * rnorm(50))
p1 <- df |>
ggplot(aes(x=x1, y=y1)) +
geom_point(color="#0072B2") +
geom_smooth(method=lm, formula = y ~ x, color="#0072B2") +
labs(x = "変数X", y = "変数Y", title = "相関のない二つの変数") +
coord_fixed()
p2 <- df |>
ggplot(aes(x=x2, y=y2)) +
geom_point(color="#009E73") +
geom_smooth(method=lm, formula = y ~ x, color="#009E73") +
labs(x = "変数X", y = "変数Y", title = "相関のある二つの変数") +
coord_fixed()
p1 + p2
「分布の裾の様子」の図はよく理解できなかったので描いていません。
library(conflicted)
library(tidyverse)
# 平均0、標準偏差1の正規分布から10000個のサンプルを生成
mu <- 0
sigma <- 1
set.seed(0)
p1 <- data.frame(val = rnorm(10000, mean = mu, sd = sigma)) |>
ggplot(aes(x=val)) +
geom_histogram(aes(y=after_stat(density)), bins=30, color = "black", fill = "#999999") +
stat_function(fun = dnorm, args = list(mean=mu, sd=sigma), colour = "red") +
coord_cartesian(ylim = c(0, 0.45)) +
labs(title = "正規分布", x = "", y = "相対度数") +
theme(aspect.ratio = 3/5) #縦横比
# 標準コーシー分布から10000個のサンプルを生成
set.seed(0)
p2 <- data.frame(val = rcauchy(10000)) |>
dplyr::filter(val > -10 & val < 10) |> # 外れ値を捨てる
ggplot(aes(x=val)) +
geom_histogram(aes(y=after_stat(density)), bins=50, color = "black", fill = "#999999") +
stat_function(fun = dcauchy, colour = "blue") +
coord_cartesian(ylim = c(0, 0.45)) +
labs(title = "コーシー分布", x = "", y = "相対度数") +
theme(aspect.ratio = 3/5) #縦横比
p1 + p2
library(conflicted)
library(tidyverse)
library(patchwork)
# データフレームの生成
set.seed(0)
df <- tibble(
x = seq(0, 30, 1),# 0から30までの範囲で等間隔に30個の数を生成
y = exp(0.1 * x) * 100 * (1 + rnorm(31, 0, 0.08)) # 指数関数にランダムなノイズを付加する
)
p1 <- df |>
ggplot(aes(x = x, y = y)) +
geom_point() +
labs(x="経過日数", y="新規感染者数", title="リニアスケールでの表示") +
theme(aspect.ratio = 1) #縦横比
p2 <- df |>
ggplot(aes(x = x, y = y)) +
geom_point() +
scale_y_log10() +
labs(x="経過日数", y="新規感染者数(対数軸)", title="片対数での表示") +
theme(aspect.ratio = 1) #縦横比
p1 + p2
第1章はここまで。