Information

Version: 1.0.1
Title: Phylogenetic Tree-based Microbiome Association TEST (TMAT)
R package Dependancies: ggplot2 raster stringr ape grid edgeR parallel
URL: http://healthstat.snu.ac.kr/software/tmat/
Maintainer: Kangjin Kim <kangjin.kim1109@gmail.com>


Usage

Obtain TMAT p-value and plot

TMAT(Data_Read,Pheno,Tree,Taxonomy_Input,Response_Type="categorical",ncore=1,db="silva",TestType="Chi-squared",TMAT_type="M",fdr_cutoff=0.05,plot=T)

Output

TMAT returns an object of class list containing the following compoenents.


Example

source("https://raw.githubusercontent.com/viva1109/TMAT/main/R/TMAT.R",encoding = "UTF-8")
data_d<-"https://raw.githubusercontent.com/viva1109/TMAT/main/"

#Choose a database choose either "silva" or "ez".
db<-"silva"
print(db)
## [1] "silva"
if(str_detect(db,"ez")){
  tt_d<-"/data/otu_comp_new_ez.txt"
  tt_tree<-"/tree/ez_sina_fastmp.tre"
}
if(str_detect(db,"silva")){
  tt_d<-"/data/otu_comp_new_silva.txt"
  tt_tree<-"/tree/97_otus.tre"
}
data_list_nm<-paste0(data_d,tt_d)
phenofile<-paste0(data_d,"/data/pheno_togo.csv")
file_tree<-paste0(data_d,tt_tree)

#Check the file paths according to the chosen database
print(c(data_list_nm,file_tree,phenofile))
## [1] "https://raw.githubusercontent.com/viva1109/TMAT/main/data/otu_comp_new_silva.txt"
## [2] "https://raw.githubusercontent.com/viva1109/TMAT/main/tree/97_otus.tre"           
## [3] "https://raw.githubusercontent.com/viva1109/TMAT/main/data/pheno_togo.csv"
#Metagenome file.
data_read<-read.delim(data_list_nm,stringsAsFactors=F,check.names = F)
data_input<-data_read[,-dim(data_read)[2]]
taxonomy_full<-data_read[,dim(data_read)[2]]
data_input[1:6,1:6]
##                X.OTU.ID ERR475467 ERR475468 ERR475469 ERR475470 ERR475471
## 1   AAAA02020713.1.1297         1         3         0         1         0
## 2  AACY023846778.1.1273         0         0         0         0         0
## 3   AAQL01001287.5.1442         0         1         0         0         0
## 4   AATC01000018.2.1513         0         0         0         0         0
## 5 AATE01000163.205.1482         0         0         0         0         0
## 6 AATF01000761.367.1874         0         0         0         0         0
#This taxonomy information corresponds to the rows of data_input.
head(taxonomy_full)
## [1] "D_0__Bacteria;D_1__Proteobacteria;D_2__Gammaproteobacteria;D_3__Enterobacteriales;D_4__Enterobacteriaceae;D_5__Escherichia-Shigella;D_6__Oryza sativa Indica Group (long-grained rice)"
## [2] "D_0__Bacteria;D_1__Proteobacteria;D_2__Alphaproteobacteria;D_3__Rhodobacterales;D_4__Rhodobacteraceae;D_5__uncultured;D_6__marine metagenome"                                          
## [3] "D_0__Bacteria;D_1__Firmicutes;D_2__Clostridia;D_3__Clostridiales;D_4__Christensenellaceae;D_5__Christensenellaceae R-7 group;D_6__metagenome"                                          
## [4] "D_0__Bacteria;D_1__Firmicutes;D_2__Clostridia;D_3__Clostridiales;D_4__Lachnospiraceae;D_5__uncultured;D_6__mouse gut metagenome"                                                       
## [5] "D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3__Lactobacillales;D_4__Lactobacillaceae;D_5__Lactobacillus;D_6__mouse gut metagenome"                                                    
## [6] "D_0__Bacteria;D_1__Firmicutes;D_2__Clostridia;D_3__Clostridiales;D_4__Ruminococcaceae;D_5__Ruminococcus 1;D_6__mouse gut metagenome"
#Phenotype file. Note that Sample ID column should be a subset of the second to last column names of data_input.
pheno<-read.csv(phenofile,stringsAsFactors=F,check.names = F)
head(pheno)
##   Sample ID Group AGE Gender BMI
## 1 ERR475467     0  66      M  24
## 2 ERR475468     0  74      M  27
## 3 ERR475469     0  54      M  26
## 4 ERR475470     0  65      M  30
## 5 ERR475471     0  57      M  24
## 6 ERR475473     0  53      F  30
#Tree file. Note that the set of first column of data_input should be a subset of the set of tip labels of this object.
tree<-try(read.tree(file = file_tree, text = NULL, tree.names = NULL, skip = 0,comment.char = "#", keep.multi = FALSE))
tree
## 
## Phylogenetic tree with 191411 tips and 191407 internal nodes.
## 
## Tip labels:
##  FJ879449.1.1495, EF098973.1.1393, EU465088.1.1396, FJ951876.1.1496, EU771949.1.1390, EU464360.1.1286, ...
## Node labels:
##  , 0.986, 0.853, 0.977, 0.972, 0.901, ...
## 
## Unrooted; includes branch lengths.
#Run TMAT and get FDR for each genera.
output<-TMAT(data_input,pheno,tree,taxonomy_full,plot=F)
head(output$FDR_Table[order(output$FDR_Table$FDR,decreasing=F),])
##               Genus      p-value         FDR
## 33    Fusobacterium 2.318981e-05 0.001692856
## 42   Lysinibacillus 3.529025e-04 0.012880940
## 13     Anaerostipes 2.400013e-03 0.043800242
## 55        Roseburia 2.034615e-03 0.043800242
## 68    Streptococcus 3.443509e-03 0.050275234
## 32 Fusicatenibacter 9.560862e-03 0.103609159
#Investigate the information for statistics of all the  internal nodes  and covariate-adjustments for Streptococcus.
ind_investigate<-which(output$FDR_Table$Genus=="Roseburia")
output$Node_Statistics_Details[[ind_investigate]]
## $T1
## $T1$Summary
##                   Estimate Statistic df1 df2     p.value         FDR
## F-test                  NA  10.79486   1 110 0.001366008 0.002732016
## Chi-squared Test        NA  10.79486   1  NA 0.001017826 0.002035651
## [Y = 1 <-> 0]    0.3100566        NA  NA  NA          NA          NA
## 
## $T1$Covariates_Summary
##                                 Estimate  Statistic df1 df2   p.value
## intercept.F-test                      NA 0.03169942   1 110 0.8590164
## intercept.Chi-squared Test            NA 0.03169942   1  NA 0.8586890
## intercept.[Y = 1 <-> 0]     0.0013408169         NA  NA  NA        NA
## AGE.F-test                            NA 1.26360629   1 110 0.2634167
## AGE.Chi-squared Test                  NA 1.26360629   1  NA 0.2609696
## AGE.[Y = 1 <-> 0]           0.0001815553         NA  NA  NA        NA
## Gender.F-test                         NA 0.46359631   1 110 0.4973788
## Gender.Chi-squared Test               NA 0.46359631   1  NA 0.4959480
## Gender.[Y = 1 <-> 0]        0.0086851549         NA  NA  NA        NA
## BMI.F-test                            NA 1.82260230   1 110 0.1797753
## BMI.Chi-squared Test                  NA 1.82260230   1  NA 0.1770038
## BMI.[Y = 1 <-> 0]          -0.0005815856         NA  NA  NA        NA
## 
## 
## $T0
## $T0$Summary
##                    Estimate Statistic df1 df2    p.value        FDR
## F-test                   NA  4.250753   1 110 0.04158970 0.04158970
## Chi-squared Test         NA  4.250753   1  NA 0.03923292 0.03923292
## [Y = 1 <-> 0]    -0.1026349        NA  NA  NA         NA         NA
## 
## $T0$Covariates_Summary
##                                 Estimate   Statistic df1 df2   p.value
## intercept.F-test                      NA 0.005778402   1 110 0.9395446
## intercept.Chi-squared Test            NA 0.005778402   1  NA 0.9394065
## intercept.[Y = 1 <-> 0]     0.0003117437          NA  NA  NA        NA
## AGE.F-test                            NA 1.439175881   1 110 0.2328503
## AGE.Chi-squared Test                  NA 1.439175881   1  NA 0.2302727
## AGE.[Y = 1 <-> 0]          -0.0001056117          NA  NA  NA        NA
## Gender.F-test                         NA 0.049344128   1 110 0.8246204
## Gender.Chi-squared Test               NA 0.049344128   1  NA 0.8242085
## Gender.[Y = 1 <-> 0]        0.0015403054          NA  NA  NA        NA
## BMI.F-test                            NA 0.370938146   1 110 0.5437492
## BMI.Chi-squared Test                  NA 0.370938146   1  NA 0.5424933
## BMI.[Y = 1 <-> 0]           0.0001419466          NA  NA  NA        NA
#Get plots for significant genera. P-values for each nodes in this plot are FDR corrected.
output<-TMAT(data_input,pheno,tree,taxonomy_full,plot=T)