Version: 1.0.1
Title: Phylogenetic Tree-based Microbiome Association TEST (TMAT)
R package Dependancies: ggplot2
raster
stringr
ape
grid
edgeR
parallel
URL: http://healthstat.snu.ac.kr/software/tmat/
Maintainer: Kangjin Kim <kangjin.kim1109@gmail.com>
Obtain TMAT p-value and plot
TMAT(Data_Read,Pheno,Tree,Taxonomy_Input,Response_Type="categorical",ncore=1,db="silva",TestType="Chi-squared",TMAT_type="M",fdr_cutoff=0.05,plot=T)
data.frame
of metagenome data whose rows corresponds to OTUs. Its first column indicates unique OTU ID corresponding to Tree object. The other columns has absolute abundances of OTUs for each samples. Column names should corresponds to first column of Pheno
object. Example file: http://viva1109.iptime.org/paper/TMAT/otu_comp_new_silva.txtdata.frame
of phenotye data whose rows corresponds to the samples. First column, second coumn and the other columns represent sample ID, response variable and covariates to adjust. First column should be a subset of column names of Data_Read
object. Example file: http://viva1109.iptime.org/paper/TMAT/pheno_togo.csvData_Read
object. Example file: http://viva1109.iptime.org/paper/TMAT/97_otus.treData_Read
object."categorical"
or "gaussian"
.1
). Option with >1 value is not available for windows OS system."silva"
for Silva database or "ez"
for Ez-Taxon database."Chi-squared"
or "F-test"
"M"
for TMATM or "IM"
for TMATIM.TRUE
, this option returns plots with significant genera. Only available when Response_Type
is "categorical"
.TMAT returns an object of class list
containing the following compoenents.
data.frame
with the following columns.$Genus
: Genus taxonomy.$p-value
: TMAT p-values.$FDR
: False discovery rate for each genera.
list
with the following components.$"name of genus"$"name of test node"$Summary
: Information of the test statistics for each test nodes.$"name of genus"$"name of test node"$Covariates_Summary
: Information for covariate adjustment for each test nodes.
source("https://raw.githubusercontent.com/viva1109/TMAT/main/R/TMAT.R",encoding = "UTF-8")
data_d<-"https://raw.githubusercontent.com/viva1109/TMAT/main/"
#Choose a database choose either "silva" or "ez".
db<-"silva"
print(db)
## [1] "silva"
if(str_detect(db,"ez")){
tt_d<-"/data/otu_comp_new_ez.txt"
tt_tree<-"/tree/ez_sina_fastmp.tre"
}
if(str_detect(db,"silva")){
tt_d<-"/data/otu_comp_new_silva.txt"
tt_tree<-"/tree/97_otus.tre"
}
data_list_nm<-paste0(data_d,tt_d)
phenofile<-paste0(data_d,"/data/pheno_togo.csv")
file_tree<-paste0(data_d,tt_tree)
#Check the file paths according to the chosen database
print(c(data_list_nm,file_tree,phenofile))
## [1] "https://raw.githubusercontent.com/viva1109/TMAT/main/data/otu_comp_new_silva.txt"
## [2] "https://raw.githubusercontent.com/viva1109/TMAT/main/tree/97_otus.tre"
## [3] "https://raw.githubusercontent.com/viva1109/TMAT/main/data/pheno_togo.csv"
#Metagenome file.
data_read<-read.delim(data_list_nm,stringsAsFactors=F,check.names = F)
data_input<-data_read[,-dim(data_read)[2]]
taxonomy_full<-data_read[,dim(data_read)[2]]
data_input[1:6,1:6]
## X.OTU.ID ERR475467 ERR475468 ERR475469 ERR475470 ERR475471
## 1 AAAA02020713.1.1297 1 3 0 1 0
## 2 AACY023846778.1.1273 0 0 0 0 0
## 3 AAQL01001287.5.1442 0 1 0 0 0
## 4 AATC01000018.2.1513 0 0 0 0 0
## 5 AATE01000163.205.1482 0 0 0 0 0
## 6 AATF01000761.367.1874 0 0 0 0 0
#This taxonomy information corresponds to the rows of data_input.
head(taxonomy_full)
## [1] "D_0__Bacteria;D_1__Proteobacteria;D_2__Gammaproteobacteria;D_3__Enterobacteriales;D_4__Enterobacteriaceae;D_5__Escherichia-Shigella;D_6__Oryza sativa Indica Group (long-grained rice)"
## [2] "D_0__Bacteria;D_1__Proteobacteria;D_2__Alphaproteobacteria;D_3__Rhodobacterales;D_4__Rhodobacteraceae;D_5__uncultured;D_6__marine metagenome"
## [3] "D_0__Bacteria;D_1__Firmicutes;D_2__Clostridia;D_3__Clostridiales;D_4__Christensenellaceae;D_5__Christensenellaceae R-7 group;D_6__metagenome"
## [4] "D_0__Bacteria;D_1__Firmicutes;D_2__Clostridia;D_3__Clostridiales;D_4__Lachnospiraceae;D_5__uncultured;D_6__mouse gut metagenome"
## [5] "D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3__Lactobacillales;D_4__Lactobacillaceae;D_5__Lactobacillus;D_6__mouse gut metagenome"
## [6] "D_0__Bacteria;D_1__Firmicutes;D_2__Clostridia;D_3__Clostridiales;D_4__Ruminococcaceae;D_5__Ruminococcus 1;D_6__mouse gut metagenome"
#Phenotype file. Note that Sample ID column should be a subset of the second to last column names of data_input.
pheno<-read.csv(phenofile,stringsAsFactors=F,check.names = F)
head(pheno)
## Sample ID Group AGE Gender BMI
## 1 ERR475467 0 66 M 24
## 2 ERR475468 0 74 M 27
## 3 ERR475469 0 54 M 26
## 4 ERR475470 0 65 M 30
## 5 ERR475471 0 57 M 24
## 6 ERR475473 0 53 F 30
#Tree file. Note that the set of first column of data_input should be a subset of the set of tip labels of this object.
tree<-try(read.tree(file = file_tree, text = NULL, tree.names = NULL, skip = 0,comment.char = "#", keep.multi = FALSE))
tree
##
## Phylogenetic tree with 191411 tips and 191407 internal nodes.
##
## Tip labels:
## FJ879449.1.1495, EF098973.1.1393, EU465088.1.1396, FJ951876.1.1496, EU771949.1.1390, EU464360.1.1286, ...
## Node labels:
## , 0.986, 0.853, 0.977, 0.972, 0.901, ...
##
## Unrooted; includes branch lengths.
#Run TMAT and get FDR for each genera.
output<-TMAT(data_input,pheno,tree,taxonomy_full,plot=F)
head(output$FDR_Table[order(output$FDR_Table$FDR,decreasing=F),])
## Genus p-value FDR
## 33 Fusobacterium 2.318981e-05 0.001692856
## 42 Lysinibacillus 3.529025e-04 0.012880940
## 13 Anaerostipes 2.400013e-03 0.043800242
## 55 Roseburia 2.034615e-03 0.043800242
## 68 Streptococcus 3.443509e-03 0.050275234
## 32 Fusicatenibacter 9.560862e-03 0.103609159
#Investigate the information for statistics of all the internal nodes and covariate-adjustments for Streptococcus.
ind_investigate<-which(output$FDR_Table$Genus=="Roseburia")
output$Node_Statistics_Details[[ind_investigate]]
## $T1
## $T1$Summary
## Estimate Statistic df1 df2 p.value FDR
## F-test NA 10.79486 1 110 0.001366008 0.002732016
## Chi-squared Test NA 10.79486 1 NA 0.001017826 0.002035651
## [Y = 1 <-> 0] 0.3100566 NA NA NA NA NA
##
## $T1$Covariates_Summary
## Estimate Statistic df1 df2 p.value
## intercept.F-test NA 0.03169942 1 110 0.8590164
## intercept.Chi-squared Test NA 0.03169942 1 NA 0.8586890
## intercept.[Y = 1 <-> 0] 0.0013408169 NA NA NA NA
## AGE.F-test NA 1.26360629 1 110 0.2634167
## AGE.Chi-squared Test NA 1.26360629 1 NA 0.2609696
## AGE.[Y = 1 <-> 0] 0.0001815553 NA NA NA NA
## Gender.F-test NA 0.46359631 1 110 0.4973788
## Gender.Chi-squared Test NA 0.46359631 1 NA 0.4959480
## Gender.[Y = 1 <-> 0] 0.0086851549 NA NA NA NA
## BMI.F-test NA 1.82260230 1 110 0.1797753
## BMI.Chi-squared Test NA 1.82260230 1 NA 0.1770038
## BMI.[Y = 1 <-> 0] -0.0005815856 NA NA NA NA
##
##
## $T0
## $T0$Summary
## Estimate Statistic df1 df2 p.value FDR
## F-test NA 4.250753 1 110 0.04158970 0.04158970
## Chi-squared Test NA 4.250753 1 NA 0.03923292 0.03923292
## [Y = 1 <-> 0] -0.1026349 NA NA NA NA NA
##
## $T0$Covariates_Summary
## Estimate Statistic df1 df2 p.value
## intercept.F-test NA 0.005778402 1 110 0.9395446
## intercept.Chi-squared Test NA 0.005778402 1 NA 0.9394065
## intercept.[Y = 1 <-> 0] 0.0003117437 NA NA NA NA
## AGE.F-test NA 1.439175881 1 110 0.2328503
## AGE.Chi-squared Test NA 1.439175881 1 NA 0.2302727
## AGE.[Y = 1 <-> 0] -0.0001056117 NA NA NA NA
## Gender.F-test NA 0.049344128 1 110 0.8246204
## Gender.Chi-squared Test NA 0.049344128 1 NA 0.8242085
## Gender.[Y = 1 <-> 0] 0.0015403054 NA NA NA NA
## BMI.F-test NA 0.370938146 1 110 0.5437492
## BMI.Chi-squared Test NA 0.370938146 1 NA 0.5424933
## BMI.[Y = 1 <-> 0] 0.0001419466 NA NA NA NA
#Get plots for significant genera. P-values for each nodes in this plot are FDR corrected.
output<-TMAT(data_input,pheno,tree,taxonomy_full,plot=T)