김해김씨99대손 2024. 8. 20. 17:23
# 패키지 불러오기 
library(tidyverse)
library(phyloseq)
library(RColorBrewer)
library(ggplot2)


taxa_plot <- function(melt, taxa, top){
	
 #### Function ####_________________________________________________
 
 # Process phyloseq melt data
 F1.process_data = function(melt , taxa, top) {
    
    # Abundance 순으로 정렬
    Order <- tapply(melt$Abundance, melt[, taxa], sum) %>% 
      sort(decreasing = TRUE)
    
    # Top N 추출 
    Names <- names(Order[1:top])
    
    # Top n 에 속하는 Phylum 추출하기 
    Top_p <- melt[melt[, taxa] %in% Names, "Phylum"] %>% 
      unique()
      
    # Top n 에 속하는 Phylum와 Taxa 를 data frame으로 정리하기 
    p_tax_table <-  melt[ melt[, taxa] %in% Names, c("Phylum", taxa)]  %>%
      .[!duplicated(.[ , taxa]),] 
    
    # Top N 을 제외한 taxa는 모두 Other 로 치환
    processed_data <- melt
    processed_data[!processed_data[, "Phylum"] %in% Top_p, "Phylum" ] <- "Other"
    processed_data[!processed_data[, "Phylum"] %in% Top_p, taxa] <- "Other"
    
    
    # Top phylum에 속하지 않은 taxa는 모두 Other로 변환하기 
    if (taxa != "Species") {
      for (i in Top_p) {
        G <-p_tax_table[p_tax_table[, "Phylum"] == i, taxa]
        
        processed_data[processed_data[, "Phylum"] == i, taxa]
        
        
        
        processed_data[processed_data[, "Phylum"] == i & !processed_data[, taxa] %in% G, taxa] <- paste0(i, "_Other")
      }
      for (i in Top_p) {
        G <- p_tax_table[p_tax_table[, "Phylum"] == i, taxa]
        for (g in G) {
          processed_data[processed_data[, taxa] == g, taxa] <- paste0(i, "_", g)
        }
      }
    } else {
      for (i in Top_p) {
        G <- p_tax_table[p_tax_table[, "Phylum"] == i, taxa]
        processed_data[processed_data[, "Phylum"] == i & !processed_data[, taxa] %in% G, taxa] <- "Other"
        processed_data[processed_data[, "Phylum"] == i & !processed_data[, taxa] %in% G, "Phylum"] <- "Other"
      }
    }
    return(list(df = processed_data, 
                Phylum_list = Top_p))
    
  }
  # 각 taxa 순서별로 정렬하기 
  	# 나중에 시각화 시 legend순서와 동일함 
  F2.Order_data = function(processed_data, Top_p, taxa){
    # phylum order 설정
    table <- processed_data[processed_data[, "Phylum" ] %in% Top_p, c("Abundance", "Phylum", taxa)]
    p_order <- table %>%  .[,"Phylum" ]%>% unique
    processed_data[,"Phylum"]  <- factor(processed_data[,"Phylum" ],  levels = c(sort(p_order), "Other"))
    
    # Abundance와 Phylum 순으로 정렬
    table_2 <- table %>% 
      dplyr::group_by(Phylum, !!rlang::sym(taxa)) %>%
      dplyr::summarise(sum.Abundance=sum(Abundance), .groups = 'drop') %>%
      dplyr::arrange( -sum.Abundance) %>%
      ungroup() %>% 
      as.data.frame()
    
    # 정렬된 데이터에서 원하는 계통 추출 
    g_order <- table_2 %>% 
      dplyr::arrange(Phylum) %>%
      select(all_of(taxa))  %>% .[[1]]
    
    # 정렬된 계통으로 순서 고정 (= factor화 하기) 
    processed_data[ ,taxa] <- factor(processed_data[,taxa], levels = c(g_order, "Other"))
    return(list(df = processed_data, summary_df = table_2))
    
  }
  # taxa개수에 따른 color 분배 
  get_palette_colors = function(palette_name, num_taxa) {
    if (num_taxa == 1) {
      colors <- rev(brewer.pal(9, palette_name)[5])
    } else if (num_taxa == 2) {
      colors <- rev(brewer.pal(9, palette_name)[c(3, 7)])
    } else if (num_taxa >= 3 & num_taxa <= 9) {
      colors <- rev(brewer.pal(num_taxa, palette_name))
    } else {
      color_list <- rev(brewer.pal(9, palette_name))
      colors <- colorRampPalette(color_list)(num_taxa)
    }
    return(colors)
  } 
  # 특정 phylum에 따라 색깔 지정 
  F3.generate_colors = function(processed_data, table_2, taxa) {
    
    ## arrange by abundance and phylum
    table_3 <- table_2 %>%
      dplyr::arrange(-sum.Abundance) %>%
      dplyr::arrange(Phylum) %>%
      dplyr::select(Phylum, !!rlang::sym(taxa))
    
    ## count taxa 
    categories <- aggregate(as.formula(paste(taxa, "Phylum", sep = "~")), 
                            table_3, 
                            function(x) length(unique(x)))
    
    ## First phylum 
    # P_levels <- table_3$Phylum %>% unique
    
    ## color list 
    color_list.names <- categories[, "Phylum"]
    color_list <- vector("list", length(color_list.names))
    names(color_list) <- color_list.names
    
    ## 
    phylum_color_map <- list(
      # Bacteria
      Actinobacteria = "Reds",
      Firmicutes = "Blues",
      Bacteroidetes = "Purples",
      Proteobacteria = "Greens",
      Fusobacteria = "YlOrBr"
      # Fungi 
      
      
    )
    
    basic_p <- names(phylum_color_map)
    other_colors <- c("RdPu", "YlOrBr", "Spectral", "BrBG", "PuOr")
    other_p <- categories$Phylum[!categories$Phylum %in% basic_p]
    other_color_map <- setNames(other_colors[1:length(other_p)], other_p)
    
    phylum_color_map <- c(phylum_color_map, other_color_map)
    
    for (phylum in names(phylum_color_map)) {
      num_taxa <- categories[categories$Phylum == phylum, taxa]
      palette_name <- phylum_color_map[[phylum]]
      
      if (length(num_taxa) > 0) {
        color_list[[phylum]] <- get_palette_colors(palette_name, num_taxa)
      }
    }
    
    
    color_vector <- unlist(color_list, use.names = FALSE)
    final_color <- c(color_vector, "#D3D3D3")
    return(final_color)
  }
  # F4.sampleID_order
  F4.sampleID_order = function(processed_data, taxonomy = Phylum, target = "Actinobacteria", 
                               x_axis = "SampleID") {
    Order <- processed_data %>%
      dplyr::group_by(!!rlang::sym(x_axis)) %>%
      mutate(Abundance = Abundance / sum(Abundance)) %>%
      filter(!!rlang::sym(taxonomy) == target) %>%
      dplyr::group_by(!!rlang::sym(x_axis)) %>%
      dplyr::summarise(Abundance = sum(Abundance)) %>%
      dplyr::arrange(Abundance) %>%
      pull(!!rlang::sym(x_axis)) %>%
      as.character()
    
    processed_data[, x_axis] <- factor(processed_data[, x_axis], levels = Order)
    
    return(processed_data)
  }

  # F5.taxa_plot
  F5.taxa_plot <- function(df, color, taxa, x_axis){
    p <- ggplot(df, aes(x = !!rlang::sym(x_axis), y = Abundance, fill = !!rlang::sym(taxa))) +
      geom_bar(stat = "identity", position="fill") +
      labs(y = "Relative Abundance") +
      theme_classic() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1),  # x축 라벨 각도 조정
            plot.title = element_text(hjust = 0.5),  # 제목 가운데 정렬
            legend.position = "right",  # 범례를 하단에 위치
            legend.title = element_blank()) +  # 범례 제목 제거
      scale_fill_manual(values = color)   # 색상 팔레트 변경
    
    
    print(p)
  }

  
  ####   Run   ####_________________________________________________

  # Topn에 속하지 않는 taxa와 Phylum을 Other 로 정의
  out2 <- F1.process_data(melt, taxa, top)
  
  # Abundance와 Phylum순으로 정렬 
  out3 <- F2.Order_data(out2$df, out2$Phylum_list, taxa)

  # 원하는 색 추출 
  color_code <- F3.generate_colors(out3$df, out3$summary_df, taxa)
  
  # 시각화를 위하여 샘플 정렬 
  out4 <- F4.sampleID_order(out3$df, "Phylum", "Bacteroidetes", x_axis = "Sample")
  
  # 시각화 
  plot <- F5.taxa_plot(out4, color_code, taxa, "Sample")

  return(plot)  
  
  
}

 

 

코드 사용 

################################################################################
# example data

data(GlobalPatterns)
GlobalPatterns

GlobalPatterns.g <- tax_glom(GlobalPatterns, "Genus")
GlobalPatterns.g.rel  <- transform_sample_counts(GlobalPatterns.g, function(x) x/sum(x))

plot <- taxa_plot(psmelt(GlobalPatterns.g.rel), top = 10, taxa = "Genus")
plot
반응형