図の右上のshow
ボタンを押すとRのコードが表示されます。
Rでレーダーチャートを描くパッケージはいくつかあるのだが、どれもイマイチ納得いかなかったのでポーラーチャートで勘弁して下さい。
中澤先生から教えていただいたfmsbパッケージを使う方法でキレイに描けました!
fmsbパッケージの日本語マニュアルはこちら。
library(conflicted)
library(tidyverse)
library(fmsb)
data1 <- data.frame(
国語 = c(5, 1, 5, 3),
理科 = c(5, 1, 5, 4),
社会 = c(5, 1, 4, 3),
体育 = c(5, 1, 4, 4),
音楽 = c(5, 1, 5, 2),
数学 = c(5, 1, 5, 2)
)
data2 <- data.frame(
数学 = c(5, 1, 5, 5),
国語 = c(5, 1, 5, 1),
理科 = c(5, 1, 1, 5),
社会 = c(5, 1, 1, 1),
体育 = c(5, 1, 1, 5),
音楽 = c(5, 1, 5, 1)
)
my_radarchart <- function(data){
radarchartcirc(
data, plty = 1, axistype = 4, seg = 4, pty = 32, pcol = c(4, 2),
cglwd = 0.5, cglcol = "gray20", caxislabels=sprintf("%d", 1:5),
pfcol = c(adjustcolor("lightblue", 0.5), adjustcolor("pink", 0.5))
)
text(-0.4, -0.3, "Aさん", col="red")
text(-0.4, 0.6, "Bさん", col="blue")
}
par(mfrow = c(2, 2), oma = c(0,0,0,0), mar = c(0,0,0,0))
# plot1
data1 |>
select(数学, 国語, 理科, 社会, 体育, 音楽) |>
my_radarchart()
# plot2
data1 |>
select(音楽, 数学, 理科, 体育, 社会, 国語) |>
my_radarchart()
# plot3
data2 |>
select(数学, 国語, 理科, 社会, 体育, 音楽) |>
my_radarchart()
# plot4
data2 |>
select(音楽, 数学, 理科, 体育, 社会, 国語) |>
my_radarchart()
library(conflicted)
library(tidyverse)
library(patchwork)
v_names <- c(
"ブドウの品種", "アルコール度数", "リンゴ酸", "ミネラル分",
"ミネラル分のアルカリ度", "マグネシウム", "全フェノール類", "フラバノイド",
"非フラバノイドフェノール類", "プロアントシアニン", "色の強さ", "色相",
"OD280/OD315値", "プロリン"
)
df <- read_csv(
"https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data",
col_names = v_names,
col_types = "f"
) |>
mutate(across(where(is.numeric), \(x) scale(x))) |> #標準化
rowid_to_column() |>
pivot_longer(!c(ブドウの品種, rowid)) |>
mutate(name = factor(name, levels = v_names))
df |>
ggplot(aes(x = name, y = value, colour = ブドウの品種, group =rowid)) +
geom_line(alpha = 0.5) +
coord_flip() +
labs(title = "ワインの特徴とブドウの品種", y = "相対スコア", x = "")
library(conflicted)
library(tidyverse)
v_names <- c(
"ブドウの品種", "アルコール度数", "リンゴ酸", "ミネラル分",
"ミネラル分のアルカリ度", "マグネシウム", "全フェノール類", "フラバノイド",
"非フラバノイドフェノール類", "プロアントシアニン", "色の強さ", "色相",
"OD280/OD315値", "プロリン"
)
df <- read_csv(
"https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data",
col_names = v_names,
col_types = "f"
) |>
mutate(across(where(is.numeric), \(x) scale(x))) |> #標準化
rowid_to_column() |>
pivot_longer(!c(ブドウの品種, rowid)) |>
mutate(name = factor(name, levels = v_names))
p1 <- df |>
ggplot(aes(x = name, y = value, group = ブドウの品種, fill = ブドウの品種)) +
stat_summary(geom = "bar", fun = "mean", position = "dodge2") +
stat_summary(geom = "errorbar", fun.data = "mean_se", position = "dodge2") +
labs(y = "相対スコア", x = "") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
aspect.ratio = 1/2,
legend.position = "none")
p2 <- df |>
ggplot(aes(x = name, y = value, fill = ブドウの品種)) +
geom_violin(position="dodge") +
labs(y = "相対スコア", x = "") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
aspect.ratio = 1/2,
legend.position = "none")
p1 / p2
library(conflicted)
library(tidyverse)
library(viridis)
v_names <- c(
"ブドウの品種", "アルコール度数", "リンゴ酸", "ミネラル分",
"ミネラル分のアルカリ度", "マグネシウム", "全フェノール類", "フラバノイド",
"非フラバノイドフェノール類", "プロアントシアニン", "色の強さ", "色相",
"OD280/OD315値", "プロリン"
)
df <- read_csv(
"https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data",
col_names = v_names,
col_types = "f"
) |>
mutate(across(where(is.numeric), \(x) scale(x))) |> #標準化
rowid_to_column() |>
pivot_longer(!c(rowid, ブドウの品種)) |>
mutate(name = factor(name, levels = rev(v_names)))
df |>
ggplot(aes(x = rowid, y = name, fill = value)) +
geom_tile() +
facet_wrap(vars(ブドウの品種), scales = "free_x") +
scale_fill_viridis(option = "turbo") +
theme(axis.title.y = element_blank()) +
labs(x = "ワイン銘柄番号", title = "カラーコードで値を表現する")
library(conflicted)
library(tidyverse)
df <- read_csv(
"https://raw.githubusercontent.com/tkEzaki/data_visualization/main/4%E7%AB%A0/data/behavior_data.csv"
) |>
mutate(Time = Time / 3600) |> # Time列を秒から時間に変換
pivot_longer(!Time) |>
mutate(
value = case_when(
value == "Garbage" ~ "ゴミ捨て場",
value == "Nest" ~ "寝室",
value == "Other" ~ "一般の部屋",
value == "Toilet" ~ "トイレ"
) |>
factor(levels = c("ゴミ捨て場", "トイレ", "寝室", "一般の部屋"))
)
df |>
ggplot(aes(x =name , y = Time, fill = value)) +
geom_tile() +
scale_y_reverse() +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442")) +
labs(title = "ヒートマップによる行動時系列の可視化", x = "個体", y = "時間[h]")
library(conflicted)
library(tidyverse)
library(patchwork)
# CSVデータを読み込む
df <- read_csv(
"https://raw.githubusercontent.com/tkEzaki/data_visualization/main/4%E7%AB%A0/data/behavior_data.csv"
) |>
mutate(Time = Time / 3600) |> # Time列を秒から時間に変換
pivot_longer(!Time) |>
mutate(
value = case_when(
value == "Garbage" ~ "ゴミ捨て場",
value == "Nest" ~ "寝室",
value == "Other" ~ "一般の部屋",
value == "Toilet" ~ "トイレ"
) |>
factor(levels = rev(c("ゴミ捨て場", "トイレ", "寝室", "一般の部屋")))
)
# 1個体(J)のデータ
df_j <- df |>
dplyr::filter(name == "J") |>
dplyr::select(!name) |>
mutate(action = "1")
p1 <- df_j |>
ggplot(aes(x = value, y = Time, fill = value)) +
geom_tile() +
scale_y_reverse() +
labs(x = "", y = "時間[h]") +
scale_fill_brewer(palette = "Set1") +
theme(
legend.position="none",
aspect.ratio = 4 / 1.5
)
p2 <- df_j |>
dplyr::filter(Time >= 13, Time <= 16) |>
ggplot(aes(x = value, y = Time, fill = value)) +
geom_tile() +
scale_y_reverse() +
labs(x = "", y = "時間[h]") +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442")) +
theme(
legend.position="none",
aspect.ratio = 4 / 1.5
)
p1 + p2
library(conflicted)
library(tidyverse)
library(patchwork)
library(viridis)
# 共著データ
researchers <- c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J")
collaboration <- matrix(
c(
NA, 1, 1, 0, 0, 0, 0, 0, 0, 1,
1, NA, 0, 1, 1, 1, 0, 0, 0, 1,
1, 0, NA, 0, 0, 0, 1, 1, 1, 0,
0, 1, 0, NA, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, NA, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, NA, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, NA, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, NA, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, NA, 0,
1, 1, 0, 0, 0, 0, 0, 0, 0, NA
),
nrow=10,
dimnames = list(researchers, researchers)
)
p1 <- collaboration |>
as.data.frame() |>
rownames_to_column() |>
pivot_longer(!rowname) |>
mutate(
value = factor(value),
name = factor(name, levels = rev(LETTERS[1:10]))
) |>
ggplot(aes(x = rowname, y = name, fill = value, label = value)) +
geom_tile() +
geom_text() +
scale_fill_hue(name = "", labels = c("0" = "共著なし", "1" ="共著あり")) +
labs(title = "共著関係の有無", x = "", y = "") +
theme(aspect.ratio = 1)
# ここはもうちょっとスマートに書けないか?
set.seed(0)
v0 <- runif(n = (100 -10)/2, min = 0.0, max = 0.5) |> round(2)
v1 <- runif(n = (100 -10)/2, min = 0.5, max = 1.0) |> round(2)
collaboration_score <- collaboration
collaboration_score[upper.tri(collaboration_score)] <-
if_else(collaboration[lower.tri(collaboration)]== 1, v1, v0)
collaboration_score <- t(collaboration_score)
collaboration_score[upper.tri(collaboration_score)] <-
if_else(collaboration[lower.tri(collaboration)]== 1, v1, v0)
p2 <- collaboration_score |>
as.data.frame() |>
rownames_to_column() |>
pivot_longer(!rowname) |>
mutate(name = factor(name, levels = rev(LETTERS[1:10]))) |>
ggplot(aes(x = rowname, y = name, fill = value, label = value)) +
geom_tile() +
geom_text(size = 2) +
labs(title = "研究の興味の類似度", x = "", y = "") +
theme(aspect.ratio = 1) +
scale_fill_viridis(option = "turbo")
p1 + p2
igraphで書いていたものをggraphで書き直しました。
library(conflicted)
library(tidyverse)
library(igraph)
library(tidygraph)
library(ggraph)
library(patchwork)
# 共著データ
researchers <- c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J")
collaboration <- matrix(
c(
NA, 1, 1, 0, 0, 0, 0, 0, 0, 1,
1, NA, 0, 1, 1, 1, 0, 0, 0, 1,
1, 0, NA, 0, 0, 0, 1, 1, 1, 0,
0, 1, 0, NA, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, NA, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, NA, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, NA, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, NA, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, NA, 0,
1, 1, 0, 0, 0, 0, 0, 0, 0, NA
),
nrow=10,
dimnames = list(researchers, researchers)
)
# 研究の興味の類似度
set.seed(0)
v0 <- runif(n = (100 -10)/2, min = 0.0, max = 0.5) |> round(2)
v1 <- runif(n = (100 -10)/2, min = 0.5, max = 1.0) |> round(2)
collaboration_score <- collaboration
collaboration_score[upper.tri(collaboration_score)] <-
if_else(collaboration[lower.tri(collaboration)]== 1, v1, v0)
collaboration_score <-
t(collaboration_score)
collaboration_score[upper.tri(collaboration_score)] <-
if_else(collaboration[lower.tri(collaboration)]== 1, v1, v0)
# 共著ネットワーク
diag(collaboration) <- 0
collabo_g <- collaboration |>
graph_from_adjacency_matrix(mode = "undirected")
set.seed(0)
# tidygraphへ変換
collabo_g_tidy <- as_tbl_graph(collabo_g, directed = FALSE)
# 共著関係の有無
p1 <- collabo_g_tidy |>
ggraph(layout = "linear", circular = TRUE) +
geom_edge_link() +
geom_node_label(aes(label = name), repel = FALSE) +
theme_void() +
theme(aspect.ratio = 1) +
labs(title = "共著関係の有無、円状レイアウト")
p2 <- collabo_g_tidy |>
ggraph(layout = "fr") +
geom_edge_link() +
geom_node_label(aes(label = name), repel = FALSE) +
theme_void() +
theme(aspect.ratio = 1) +
labs(title = "共著関係の有無、スプリングレイアウト")
# 類似度ネットワーク
diag(collaboration_score) <- 0
score_g <- collaboration_score |>
graph_from_adjacency_matrix(mode = "undirected", weighted = TRUE)
E(score_g)$color <- round((E(score_g)$weight / max(E(score_g)$weight)) * 100, 0)
E(score_g)$weight
## [1] 0.76 0.89 0.29 0.45 0.54 0.38 0.63 0.19 0.33 0.19 0.45 0.47 0.55 0.96 0.74
## [16] 0.43 0.40 0.10 0.72 0.66 0.36 0.33 0.17 0.05 0.31 0.34 0.50 0.06 0.24 0.36
## [31] 0.19 0.19 0.13 0.30 0.21 0.83 0.19 0.25 0.41 0.01 0.09 0.32 0.41 0.39 0.28
# tidygraphへ変換
score_g_tidy <- as_tbl_graph(score_g, directed = FALSE)
p3 <- score_g_tidy |>
ggraph(layout = "linear", circular = TRUE) +
geom_edge_link(aes(width = color, color = color)) +
scale_edge_color_viridis(option = "turbo", alpha = 0.5) +
geom_node_label(aes(label = name), repel = FALSE) +
theme_void() +
theme(aspect.ratio = 1, legend.position = "none") +
labs(title = "研究の興味の類似度、円状レイアウト")
p4 <- score_g_tidy |>
ggraph(layout = "fr") +
geom_edge_link(aes(width = color, color = color)) +
scale_edge_color_viridis(option = "turbo", alpha = 0.5) +
geom_node_label(aes(label = name), repel = FALSE) +
theme_void() +
theme(aspect.ratio = 1, legend.position = "none") +
labs(title = "研究の興味の類似度、スプリングレイアウト")
{p1|p2}/{p3|p4}
library(conflicted)
library(tidyverse)
library(igraph)
library(tidygraph)
library(ggraph)
library(patchwork)
# 研究者リスト
researchers <- c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J")
# 共著データ
collaboration <- matrix(
c(
NA, 1, 1, 0, 0, 0, 0, 0, 0, 1,
0, NA, 0, 1, 1, 1, 0, 0, 0, 0,
0, 0, NA, 0, 0, 0, 1, 1, 1, 0,
0, 0, 0, NA, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, NA, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, NA, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, NA, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, NA, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, NA, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, NA
),
nrow = 10,
dimnames = list(researchers, researchers)
)
# 共著ネットワーク
p1 <- collaboration |>
as.data.frame() |>
rownames_to_column() |>
pivot_longer(!rowname) |>
mutate(
value = factor(value),
name = factor(name, levels = rev(LETTERS[1:10]))
) |>
ggplot(aes(x = rowname, y = name, fill = value, label = value)) +
geom_tile() +
geom_text() +
scale_fill_hue(name = "", labels = c("0" = "指導関係なし", "1" ="指導関係あり")) +
labs(title = "隣接行列表示", x = "指導された研究者", y = "指導した研究者") +
theme(aspect.ratio = 1)
diag(collaboration) <- 0
# tidygraphへ変換
collaboration_tidy <- as_tbl_graph(t(collaboration), directed = TRUE)
p2 <- collaboration_tidy |>
ggraph(layout = "tree") +
geom_edge_link(
arrow = arrow(type = "closed", length = unit(4, 'mm')),
start_cap = circle(4, 'mm'),
end_cap = circle(4, 'mm')
) +
geom_node_label(aes(label = name), repel = FALSE) +
theme_void() +
theme(aspect.ratio = 1, legend.position = "none") +
labs(title = "階層レイアウトによるネットワーク表示")
p1 + p2
circoレイアウトがigraphには無いようなのでFruchterman and Reingoldレイアウトで代用。
library(conflicted)
library(tidyverse)
library(igraph)
library(tidygraph)
library(ggraph)
library(patchwork)
set.seed(0)
G_er <- erdos.renyi.game(30, 0.2)
G_ws <- watts.strogatz.game(1, 30, 5, 0.1)
G_ba <- barabasi.game(30, 1)
ErdosR <- as_adjacency_matrix(G_er, sparse = FALSE)
WattsStrogatz <- as_adjacency_matrix(G_ws, sparse = FALSE)
BarabasiAlbert <- as_adjacency_matrix(G_ba, sparse = FALSE)
net1 <- graph_from_adjacency_matrix(ErdosR, mode = "max")
net2 <- graph_from_adjacency_matrix(WattsStrogatz, mode = "max")
net3 <- graph_from_adjacency_matrix(BarabasiAlbert, mode = "max")
# tidygraphへ変換
net1_tidy <- as_tbl_graph(net1, directed = FALSE)
net2_tidy <- as_tbl_graph(net2, directed = FALSE)
net3_tidy <- as_tbl_graph(net3, directed = FALSE)
p1 <- net1_tidy |>
ggraph(layout = "linear", circular = TRUE) +
geom_edge_link(color = "#666666", alpha = 0.5) +
geom_node_point(size = 2, color = "#F8766D") +
theme_void() +
theme(aspect.ratio = 1) +
labs(title = "Erdos-Reyni", subtitle = "Circular Layout")
p2 <- net2_tidy |>
ggraph(layout = "linear", circular = TRUE) +
geom_edge_link(color = "#666666", alpha = 0.5) +
geom_node_point(size = 2, color = "#F8766D") +
theme_void() +
theme(aspect.ratio = 1) +
labs(title = "Watts-Strogatz", subtitle = "Circular Layout")
p3 <- net3_tidy |>
ggraph(layout = "linear", circular = TRUE) +
geom_edge_link(color = "#666666", alpha = 0.5) +
geom_node_point(size = 2, color = "#F8766D") +
theme_void() +
theme(aspect.ratio = 1) +
labs(title = "Barabasi-Albert", subtitle = "Circular Layout")
p4 <- net1_tidy |>
ggraph(layout = "igraph", algorithm ="fr") +
geom_edge_link(color = "#666666", alpha = 0.5) +
geom_node_point(size = 2, color = "#B79F00") +
theme_void() +
theme(aspect.ratio = 1) +
labs(title = "Erdos-Reyni", subtitle = "FR Layout")
p5 <- net2_tidy |>
ggraph(layout = "igraph", algorithm ="fr") +
geom_edge_link(color = "#666666", alpha = 0.5) +
geom_node_point(size = 2, color = "#B79F00") +
theme_void() +
theme(aspect.ratio = 1) +
labs(title = "Watts-Strogatz", subtitle = "FR Layout")
p6 <- net3_tidy |>
ggraph(layout = "igraph", algorithm ="fr") +
geom_edge_link(color = "#666666", alpha = 0.5) +
geom_node_point(size = 2, color = "#B79F00") +
theme_void() +
theme(aspect.ratio = 1) +
labs(title = "Barabasi-Albert", subtitle = "FR Layout")
p7 <- net1_tidy |>
ggraph(layout = "igraph", algorithm ="kk") +
geom_edge_link(color = "#666666", alpha = 0.5) +
geom_node_point(size = 2, color = "#00BA38") +
theme_void() +
theme(aspect.ratio = 1) +
labs(title = "Erdos-Reyni", subtitle = "KK Layout")
p8 <- net2_tidy |>
ggraph(layout = "igraph", algorithm ="kk") +
geom_edge_link(color = "#666666", alpha = 0.5) +
geom_node_point(size = 2, color = "#00BA38") +
theme_void() +
theme(aspect.ratio = 1) +
labs(title = "Watts-Strogatz", subtitle = "KK Layout")
p9 <- net3_tidy |>
ggraph(layout = "igraph", algorithm ="kk") +
geom_edge_link(color = "#666666", alpha = 0.5) +
geom_node_point(size = 2, color = "#00BA38") +
theme_void() +
theme(aspect.ratio = 1) +
labs(title = "Barabasi-Albert", subtitle = "KK Layout")
{p1|p2|p3}/{p4|p5|p6}/{p7|p8|p9}
library(conflicted)
library(tidyverse)
library(igraph)
library(tidygraph)
library(ggraph)
# ランダム有向グラフを生成
set.seed(0)
G_dir_random <- random.graph.game(n=30, p=0.1, directed = TRUE)
# 階層構造を持つ有向グラフを生成
set.seed(0)
G_dir_hierarchy <- sample_tree(30, directed = TRUE)
# tidygraphへ変換
G_dir_random_tidy <- as_tbl_graph(G_dir_random, directed = TRUE)
G_dir_hierarchy_tidy <- as_tbl_graph(G_dir_hierarchy, directed = TRUE)
p1 <- G_dir_random_tidy |>
ggraph(layout = "igraph", algorithm = "tree") +
geom_edge_link(color = "#666666", alpha = 0.5, arrow = arrow(type = "closed", length = unit(4, 'mm'))) +
geom_node_point(size = 2, color = "#00BFC4") +
theme_void() +
theme(aspect.ratio = 1) +
labs(title = "Directed Random Graph", subtitle = "Tree Layout")
p2 <- G_dir_hierarchy_tidy |>
ggraph(layout = "igraph", algorithm = "tree") +
geom_edge_link(color = "#666666", alpha = 0.5, arrow = arrow(type = "closed", length = unit(4, 'mm'))) +
geom_node_point(size = 2, color = "#00BFC4") +
theme_void() +
theme(aspect.ratio = 1) +
labs(title = "Directed Graph with Wide Hierarchy", subtitle = "Tree Layout")
p3 <- G_dir_random_tidy |>
ggraph(layout = "igraph", algorithm = "fr") +
geom_edge_link(color = "#666666", alpha = 0.5, arrow = arrow(type = "closed", length = unit(4, 'mm'))) +
geom_node_point(size = 2, color = "#619CFF") +
theme_void() +
theme(aspect.ratio = 1) +
labs(title = "Directed Random Graph", subtitle = "FR Layout")
p4 <- G_dir_hierarchy_tidy |>
ggraph(layout = "igraph", algorithm = "fr") +
geom_edge_link(color = "#666666", alpha = 0.5, arrow = arrow(type = "closed", length = unit(4, 'mm'))) +
geom_node_point(size = 2, color = "#619CFF") +
theme_void() +
theme(aspect.ratio = 1) +
labs(title = "Directed Graph with Wide Hierarchy", subtitle = "FR Layout")
p5 <- G_dir_random_tidy |>
ggraph(layout = "igraph", algorithm = "kk") +
geom_edge_link(color = "#666666", alpha = 0.5, arrow = arrow(type = "closed", length = unit(4, 'mm'))) +
geom_node_point(size = 2, color = "#F564E3") +
theme_void() +
theme(aspect.ratio = 1) +
labs(title = "Directed Random Graph", subtitle = "KK Layout")
p6 <- G_dir_hierarchy_tidy |>
ggraph(layout = "igraph", algorithm = "kk") +
geom_edge_link(color = "#666666", alpha = 0.5, arrow = arrow(type = "closed", length = unit(4, 'mm'))) +
geom_node_point(size = 2, color = "#F564E3") +
theme_void() +
theme(aspect.ratio = 1) +
labs(title = "Directed Graph with Wide Hierarchy", subtitle = "KK Layout")
{p1|p2}/{p3|p4}/{p5|p6}
library(conflicted)
library(tidyverse)
library(pheatmap)
library(viridisLite)
df <- read_csv(
"https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data",
col_names = c(
"ブドウの品種", "アルコール度数", "リンゴ酸", "ミネラル分",
"ミネラル分のアルカリ度", "マグネシウム", "全フェノール類", "フラバノイド",
"非フラバノイドフェノール類", "プロアントシアニン", "色の強さ", "色相",
"OD280/OD315値", "プロリン"
),
col_types = "f"
) |>
mutate(across(where(is.numeric), \(x) scale(x))) #標準化
pheatmap(
df |>
dplyr::select(!ブドウの品種) |>
t(),
color = turbo(50),
clustering_method = "ward.D2"
)
BIRCHだけ見つからないので省略。
ちなみにRではmlbenchパッケージにベンチマーク用の様々なデータセットと人工データ生成用の関数が用意されています。
library(conflicted)
library(tidyverse)
library(ClusterR) # MiniBatch KMeans
library(apcluster) # Affinity Propagation
library(meanShiftR) # MeanShift
library(skmeans) # Spectral Clustering
library(cluster) # Agglomerative Clustering
library(dbscan) # DBSCAN, HDBSCAN, OPTICS,
library(mclust) # Gaussian Mixture
library(patchwork)
# データ用意
my_read_csv <- function(file){
read_csv(file, col_names = c("x", "y", "class")) |>
mutate( #標準化しておく
x = (x - mean(x))/sd(x),
y = (y - mean(x))/sd(x),
class = factor(class + 1))
}
# 円形のクラスタ
noisy_circles <- my_read_csv(
"https://raw.githubusercontent.com/morimotoosamu/data_visualization/main/data/noisy_circles.csv"
)
# 月型のクラスタ
noisy_moons <- my_read_csv(
"https://raw.githubusercontent.com/morimotoosamu/data_visualization/main/data/noisy_moons.csv"
)
# 正規分布に従うクラスタ
blobs <- my_read_csv(
"https://raw.githubusercontent.com/morimotoosamu/data_visualization/main/data/blobs.csv"
)
# 異方性のあるデータ
aniso <- my_read_csv(
"https://raw.githubusercontent.com/morimotoosamu/data_visualization/main/data/aniso.csv"
)
# 3つの正規分布に従うデータ
varied <- my_read_csv(
"https://raw.githubusercontent.com/morimotoosamu/data_visualization/main/data/varied.csv"
)
n <- 500
set.seed(0)
no_structure <- data.frame(x = runif(n), y = runif(n), class = factor("0"))# 構造のないデータ
# 各手法のラッパー関数を用意
# MiniBatch KMeans
my_MiniBatchKmeans <- function(data, cluster_num) {
data_cleaned <- data[, 1:2]
start <- Sys.time()
fit <- MiniBatchKmeans(data_cleaned, clusters = cluster_num)
pred <- predict(fit, data_cleaned)
end <- Sys.time()
diff <- end - start
data$pred <- as.factor(pred)
return(list(res = data, diff = diff))
}
# Affinity Propagation
my_apcluster <- function(data, cluster_num) {
data_cleaned <- data[, 1:2]
start <- Sys.time()
pred <- apcluster(s = negDistMat(r=2), x = data_cleaned, p = -200, q = 0.9) |>
cutree(cluster_num) |>
labels(type = "enum")
end <- Sys.time()
diff <- end - start
data$pred <- as.factor(pred)
return(list(res = data, diff = diff))
}
# MeanShift
my_meanShift <- function(data) {
data_cleaned <- as.matrix(data[, 1:2])
start <- Sys.time()
pred <- meanShift(data_cleaned, data_cleaned)$assignment
end <- Sys.time()
diff <- end - start
data$pred <- as.factor(pred)
return(list(res = data, diff = diff))
}
# Spectral Clustering
my_skmeans <- function(data, cluster_num) {
data_cleaned <- base::as.matrix(data[, 1:2])
start <- Sys.time()
pred <- skmeans(data_cleaned, k = cluster_num)$cluster
end <- Sys.time()
diff <- end - start
data$pred <- as.factor(pred)
return(list(res = data, diff = diff))
}
# Ward
my_hclust <- function(data, cluster_num) {
data_cleaned <- data[, 1:2]
start <- Sys.time()
pred <- data_cleaned |>
stats::dist() |>
hclust(method = "ward.D2") |>
cutree(k = cluster_num)
end <- Sys.time()
diff <- end - start
data$pred <- as.factor(pred)
return(list(res = data, diff = diff))
}
# Agglomerative Clustering
my_agnes <- function(data, cluster_num) {
data_cleaned <- data[, 1:2]
start <- Sys.time()
pred <- agnes(data_cleaned) |> cutree(k = cluster_num)
end <- Sys.time()
diff <- end - start
data$pred <- as.factor(pred)
return(list(res = data, diff = diff))
}
# DBSCAN
my_dbscan <- function(data) {
data_cleaned <- data[, 1:2]
start <- Sys.time()
pred <- dbscan::dbscan(data_cleaned, eps = 0.3)$cluster
end <- Sys.time()
diff <- end - start
data$pred <- as.factor(pred)
return(list(res = data, diff = diff))
}
# HDBSCAN
my_hdbscan <- function(data) {
data_cleaned <- data[, 1:2]
start <- Sys.time()
pred <- dbscan::hdbscan(data_cleaned, minPts = 15)$cluster
end <- Sys.time()
diff <- end - start
data$pred <- as.factor(pred)
return(list(res = data, diff = diff))
}
# OPTICS
my_optics <- function(data) {
data_cleaned <- data[, 1:2]
start <- Sys.time()
pred <- dbscan::optics(data_cleaned, eps = 0.1, minPts = 7) |>
extractXi(xi = 0.05) |>
pluck("cluster")
end <- Sys.time()
diff <- end - start
data$pred <- as.factor(pred)
return(list(res = data, diff = diff))
}
# Gaussian Mixture
my_Mclust <- function(data) {
data_cleaned <- data[, 1:2]
start <- Sys.time()
pred <- mclust::Mclust(data_cleaned, G = 1:3)$classification
end <- Sys.time()
diff <- end - start
data$pred <- as.factor(pred)
return(list(res = data, diff = diff))
}
### クラスタリング実行
dats <- list(noisy_circles, noisy_moons, varied, aniso, blobs, no_structure)
cnums <- list(2,2,3,3,3,3)
set.seed(0)
res_clustering <- list(
res_MiniBatchKmeans = map2(dats, cnums, \(dats, cnums) my_MiniBatchKmeans(dats, cnums)),
res_apcluster = map2(dats, cnums, \(dats, cnums) my_apcluster(dats, cnums)),
res_meanShift = purrr::map(dats, \(dats) my_meanShift(dats)),
res_skmeans = map2(dats, cnums, \(dats, cnums) my_skmeans(dats, cnums)),
res_hclust = map2(dats, cnums, \(dats, cnums) my_hclust(dats, cnums)),
res_agnes = map2(dats, cnums, \(dats, cnums) my_agnes(dats, cnums)),
res_dbscan = purrr::map(dats, \(dats) my_dbscan(dats)),
res_hdbscan = purrr::map(dats, \(dats) my_hdbscan(dats)),
res_optics = purrr::map(dats, \(dats) my_optics(dats)),
res_Mclust = purrr::map(dats, \(dats) my_Mclust(dats))
)
# 描画用ラッパー関数を用意
my_plot <- function(result) {
result$res |>
ggplot(aes(x = x, y = y, color = pred)) +
geom_point(size = 1) +
labs(caption = paste0(round(result$diff,3),"s")) +
theme(
axis.ticks = element_blank(), # tickの線を消す
axis.text = element_blank(), # tickの数字を消す
axis.title = element_blank(), # 軸のラベルを消す
axis.line = element_blank(), # 軸の線を消す
legend.position="none",
aspect.ratio = 1
)
}
nums <- expand_grid(d = 1:6, m = 1:10)
res_plot <- map2(nums$m, nums$d, \(.x, .y) my_plot(res_clustering[[.x]][[.y]]))
# ここがダサい……もうちょっとどうにかならないか
res_plot[[1]] + res_plot[[2]] +res_plot[[3]] +res_plot[[4]] +res_plot[[5]] +res_plot[[6]] +
res_plot[[7]] + res_plot[[8]] +res_plot[[9]] +res_plot[[10]] +res_plot[[11]] +res_plot[[12]] +
res_plot[[13]] + res_plot[[14]] +res_plot[[15]] +res_plot[[16]] +res_plot[[17]] +res_plot[[18]] +
res_plot[[19]] + res_plot[[20]] +res_plot[[21]] +res_plot[[22]] +res_plot[[23]] +res_plot[[24]] +
res_plot[[25]] + res_plot[[26]] +res_plot[[27]] +res_plot[[28]] +res_plot[[29]] +res_plot[[30]] +
res_plot[[31]] + res_plot[[32]] +res_plot[[33]] +res_plot[[34]] +res_plot[[35]] +res_plot[[36]] +
res_plot[[37]] + res_plot[[38]] +res_plot[[39]] +res_plot[[40]] +res_plot[[41]] +res_plot[[42]] +
res_plot[[43]] + res_plot[[44]] +res_plot[[45]] +res_plot[[46]] +res_plot[[47]] +res_plot[[48]] +
res_plot[[49]] + res_plot[[50]] +res_plot[[51]] +res_plot[[52]] +res_plot[[53]] +res_plot[[54]] +
res_plot[[55]] + res_plot[[56]] +res_plot[[57]] +res_plot[[58]] +res_plot[[59]] +res_plot[[60]] +
plot_layout(ncol = 10)
library(conflicted)
library(tidyverse)
library(skmeans)
library(GGally)
library(patchwork)
df <- read_csv(
"https://raw.githubusercontent.com/morimotoosamu/data_visualization/main/data/df_434.csv"
)
# 主成分分析(標準化)
pca_result <- prcomp(df)#, scale. = TRUE)
pca_importance <- as.data.frame(summary(pca_result)$importance[2,]) |>
rownames_to_column()
names(pca_importance) <- c("pc", "value")
# Spectral Clustering
clusters_pca <- skmeans(pca_result$x[, 1:2], 3)
p1 <- data.frame(
pc1 = pca_result$x[, 1],
pc2 = pca_result$x[, 2],
class = factor(clusters_pca$cluster)
) |>
ggplot(aes(x = pc1, y = pc2, color = class)) +
geom_point() +
labs(title = "二つの主成分で見る")+
theme(legend.position="none", aspect.ratio = 1)
p2 <- pca_importance |>
ggplot(aes(x = reorder(pc, desc(value)), y = value)) +
geom_col()+
labs(title = "各主成分のデータ悦明力", x = "", y = "")+
theme(aspect.ratio = 1)
p1 + p2
library(conflicted)
library(tidyverse)
library(MASS) # MDS(sammon)
library(Rtsne) # t-SNE
library(umap) # UMAP
df <- read_csv(
"https://raw.githubusercontent.com/morimotoosamu/data_visualization/main/data/digits.csv"
)
df_cleaned <- df |>
dplyr::select(!target)
# 各手法で2次元に圧縮
# 主成分分析
X_pca <- prcomp(df_cleaned)$x[, 1:2]
# t-SNE
set.seed(42)
X_tsne <- Rtsne(df_cleaned, num_threads = 2)$Y
# MDS
X_mds <- df_cleaned |>
dist() |>
sammon(trace = FALSE) |>
pluck("points")
# UMAP
set.seed(42)
X_umap <- umap(df_cleaned)$layout
# K-means
set.seed(42)
clusters_pca <- kmeans(X_pca, 10)$cluster # PCA
set.seed(42)
clusters_mds <- kmeans(X_mds, 10)$cluster #MDS
set.seed(42)
clusters_tsne <- kmeans(X_tsne, 10)$cluster # t-SNE
set.seed(42)
clusters_umap <- kmeans(X_umap, 10)$cluster # UMAP
# 結果をデータフレームにまとめる
df_base <- data.frame(
x = c(X_pca[, 1], X_mds[, 1], X_tsne[, 1], X_umap[, 1]),
y = c(X_pca[, 2], X_mds[, 2], X_tsne[, 2], X_umap[, 2])
)
n <- 1797
dimred <- bind_rows(
df_base |>
mutate(
label = rep(df$target, 4),
method = c(rep("pca_label", n), rep("mds_label", n),rep("tsne_label", n),rep("umap_label", n))
),
df_base |>
mutate(
label = c(clusters_pca, clusters_mds, clusters_tsne, clusters_umap),
method = c(rep("pca_kmeans", n), rep("mds_kmeans", n),rep("tsne_kmeans", n),rep("umap_kmeans", n))
)
) |>
mutate(
method = factor(
method,
levels = c("pca_kmeans", "pca_label", "mds_kmeans", "mds_label",
"tsne_kmeans", "tsne_label", "umap_kmeans", "umap_label")),
label = factor(label)
)
# クラスタリング結果と正解ラベルの描画
dimred |>
ggplot(aes(x = x, y = y, color = label)) +
geom_point() +
labs(title="様々な次元圧縮方法") +
theme(
axis.ticks = element_blank(), # tickの線を消す
axis.text = element_blank(), # tickの数字を消す
axis.title = element_blank(), # 軸のラベルを消す
axis.line = element_blank(), # 軸の線を消す
legend.position="none",
aspect.ratio = 1
) +
facet_wrap(vars(method), nrow = 2, scales = "free")
第4章はここまで。