Programming/R_Package function
taxa_plot()
김해김씨99대손
2024. 8. 20. 17:23
# 패키지 불러오기
library(tidyverse)
library(phyloseq)
library(RColorBrewer)
library(ggplot2)
taxa_plot <- function(melt, taxa, top){
#### Function ####_________________________________________________
# Process phyloseq melt data
F1.process_data = function(melt , taxa, top) {
# Abundance 순으로 정렬
Order <- tapply(melt$Abundance, melt[, taxa], sum) %>%
sort(decreasing = TRUE)
# Top N 추출
Names <- names(Order[1:top])
# Top n 에 속하는 Phylum 추출하기
Top_p <- melt[melt[, taxa] %in% Names, "Phylum"] %>%
unique()
# Top n 에 속하는 Phylum와 Taxa 를 data frame으로 정리하기
p_tax_table <- melt[ melt[, taxa] %in% Names, c("Phylum", taxa)] %>%
.[!duplicated(.[ , taxa]),]
# Top N 을 제외한 taxa는 모두 Other 로 치환
processed_data <- melt
processed_data[!processed_data[, "Phylum"] %in% Top_p, "Phylum" ] <- "Other"
processed_data[!processed_data[, "Phylum"] %in% Top_p, taxa] <- "Other"
# Top phylum에 속하지 않은 taxa는 모두 Other로 변환하기
if (taxa != "Species") {
for (i in Top_p) {
G <-p_tax_table[p_tax_table[, "Phylum"] == i, taxa]
processed_data[processed_data[, "Phylum"] == i, taxa]
processed_data[processed_data[, "Phylum"] == i & !processed_data[, taxa] %in% G, taxa] <- paste0(i, "_Other")
}
for (i in Top_p) {
G <- p_tax_table[p_tax_table[, "Phylum"] == i, taxa]
for (g in G) {
processed_data[processed_data[, taxa] == g, taxa] <- paste0(i, "_", g)
}
}
} else {
for (i in Top_p) {
G <- p_tax_table[p_tax_table[, "Phylum"] == i, taxa]
processed_data[processed_data[, "Phylum"] == i & !processed_data[, taxa] %in% G, taxa] <- "Other"
processed_data[processed_data[, "Phylum"] == i & !processed_data[, taxa] %in% G, "Phylum"] <- "Other"
}
}
return(list(df = processed_data,
Phylum_list = Top_p))
}
# 각 taxa 순서별로 정렬하기
# 나중에 시각화 시 legend순서와 동일함
F2.Order_data = function(processed_data, Top_p, taxa){
# phylum order 설정
table <- processed_data[processed_data[, "Phylum" ] %in% Top_p, c("Abundance", "Phylum", taxa)]
p_order <- table %>% .[,"Phylum" ]%>% unique
processed_data[,"Phylum"] <- factor(processed_data[,"Phylum" ], levels = c(sort(p_order), "Other"))
# Abundance와 Phylum 순으로 정렬
table_2 <- table %>%
dplyr::group_by(Phylum, !!rlang::sym(taxa)) %>%
dplyr::summarise(sum.Abundance=sum(Abundance), .groups = 'drop') %>%
dplyr::arrange( -sum.Abundance) %>%
ungroup() %>%
as.data.frame()
# 정렬된 데이터에서 원하는 계통 추출
g_order <- table_2 %>%
dplyr::arrange(Phylum) %>%
select(all_of(taxa)) %>% .[[1]]
# 정렬된 계통으로 순서 고정 (= factor화 하기)
processed_data[ ,taxa] <- factor(processed_data[,taxa], levels = c(g_order, "Other"))
return(list(df = processed_data, summary_df = table_2))
}
# taxa개수에 따른 color 분배
get_palette_colors = function(palette_name, num_taxa) {
if (num_taxa == 1) {
colors <- rev(brewer.pal(9, palette_name)[5])
} else if (num_taxa == 2) {
colors <- rev(brewer.pal(9, palette_name)[c(3, 7)])
} else if (num_taxa >= 3 & num_taxa <= 9) {
colors <- rev(brewer.pal(num_taxa, palette_name))
} else {
color_list <- rev(brewer.pal(9, palette_name))
colors <- colorRampPalette(color_list)(num_taxa)
}
return(colors)
}
# 특정 phylum에 따라 색깔 지정
F3.generate_colors = function(processed_data, table_2, taxa) {
## arrange by abundance and phylum
table_3 <- table_2 %>%
dplyr::arrange(-sum.Abundance) %>%
dplyr::arrange(Phylum) %>%
dplyr::select(Phylum, !!rlang::sym(taxa))
## count taxa
categories <- aggregate(as.formula(paste(taxa, "Phylum", sep = "~")),
table_3,
function(x) length(unique(x)))
## First phylum
# P_levels <- table_3$Phylum %>% unique
## color list
color_list.names <- categories[, "Phylum"]
color_list <- vector("list", length(color_list.names))
names(color_list) <- color_list.names
##
phylum_color_map <- list(
# Bacteria
Actinobacteria = "Reds",
Firmicutes = "Blues",
Bacteroidetes = "Purples",
Proteobacteria = "Greens",
Fusobacteria = "YlOrBr"
# Fungi
)
basic_p <- names(phylum_color_map)
other_colors <- c("RdPu", "YlOrBr", "Spectral", "BrBG", "PuOr")
other_p <- categories$Phylum[!categories$Phylum %in% basic_p]
other_color_map <- setNames(other_colors[1:length(other_p)], other_p)
phylum_color_map <- c(phylum_color_map, other_color_map)
for (phylum in names(phylum_color_map)) {
num_taxa <- categories[categories$Phylum == phylum, taxa]
palette_name <- phylum_color_map[[phylum]]
if (length(num_taxa) > 0) {
color_list[[phylum]] <- get_palette_colors(palette_name, num_taxa)
}
}
color_vector <- unlist(color_list, use.names = FALSE)
final_color <- c(color_vector, "#D3D3D3")
return(final_color)
}
# F4.sampleID_order
F4.sampleID_order = function(processed_data, taxonomy = Phylum, target = "Actinobacteria",
x_axis = "SampleID") {
Order <- processed_data %>%
dplyr::group_by(!!rlang::sym(x_axis)) %>%
mutate(Abundance = Abundance / sum(Abundance)) %>%
filter(!!rlang::sym(taxonomy) == target) %>%
dplyr::group_by(!!rlang::sym(x_axis)) %>%
dplyr::summarise(Abundance = sum(Abundance)) %>%
dplyr::arrange(Abundance) %>%
pull(!!rlang::sym(x_axis)) %>%
as.character()
processed_data[, x_axis] <- factor(processed_data[, x_axis], levels = Order)
return(processed_data)
}
# F5.taxa_plot
F5.taxa_plot <- function(df, color, taxa, x_axis){
p <- ggplot(df, aes(x = !!rlang::sym(x_axis), y = Abundance, fill = !!rlang::sym(taxa))) +
geom_bar(stat = "identity", position="fill") +
labs(y = "Relative Abundance") +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), # x축 라벨 각도 조정
plot.title = element_text(hjust = 0.5), # 제목 가운데 정렬
legend.position = "right", # 범례를 하단에 위치
legend.title = element_blank()) + # 범례 제목 제거
scale_fill_manual(values = color) # 색상 팔레트 변경
print(p)
}
#### Run ####_________________________________________________
# Topn에 속하지 않는 taxa와 Phylum을 Other 로 정의
out2 <- F1.process_data(melt, taxa, top)
# Abundance와 Phylum순으로 정렬
out3 <- F2.Order_data(out2$df, out2$Phylum_list, taxa)
# 원하는 색 추출
color_code <- F3.generate_colors(out3$df, out3$summary_df, taxa)
# 시각화를 위하여 샘플 정렬
out4 <- F4.sampleID_order(out3$df, "Phylum", "Bacteroidetes", x_axis = "Sample")
# 시각화
plot <- F5.taxa_plot(out4, color_code, taxa, "Sample")
return(plot)
}
코드 사용
################################################################################
# example data
data(GlobalPatterns)
GlobalPatterns
GlobalPatterns.g <- tax_glom(GlobalPatterns, "Genus")
GlobalPatterns.g.rel <- transform_sample_counts(GlobalPatterns.g, function(x) x/sum(x))
plot <- taxa_plot(psmelt(GlobalPatterns.g.rel), top = 10, taxa = "Genus")
plot
반응형