@ARTICLE{TreeBASE2Ref18654,
author = {David C. Marshall},
title = {Cryptic Failure of Partitioned Bayesian Phylogenetic Analyses: Lost in the Land of Long Trees},
year = {2010},
keywords = {},
doi = {10.1093/sysbio/syp080},
url = {},
pmid = {},
journal = {Systematic Biology},
volume = {59},
number = {1},
pages = {108--117},
abstract = {Partitioned Bayesian phylogenetic analyses of routine genetic datasets, constructed using MrBayes (Ronquist and Huelsenbeck, 2003), can become trapped in regions of parameter space characterized by unrealistically long trees and distorted partition-rate multipliers. Such analyses commonly fail to reach stationarity during hundreds of millions of generations of sampling many times longer than most published analyses. Some datasets are so prone to this problem that paired MrBayes runs begun from different starting trees repeatedly find the same incorrect long-tree solutions and consequently pass the most commonly employed tests of stationarity, including the ASDSF and PSRF statistics offered by MrBayes (Gelman and Rubin, 1992). In these situations, failure to reach stationarity is recognizable only in light of prior knowledge of model parameters, such as the expectation that third-codon-position sites usually evolve fastest in protein-coding genes. The conditions that lead to the long-tree problem are frequently encountered in phylogenetic studies today, and I present six demonstration examples from the literature. Although the effects on tree length are often dramatic, effects on topology appear to be subtle. Susceptibility to the problem is sometimes predicted by the difference between the true tree length and the starting tree length. In some cases, the problems described here can be avoided or reduced by manipulation of the starting tree length and/or by adjustments to the prior on branch lengths. In more difficult situations, accurate branch length estimation may not be possible with Bayesian methods because of dependence of the solution on the branch length prior.}
}
Citation for Study 10163
Citation title:
"Cryptic Failure of Partitioned Bayesian Phylogenetic Analyses: Lost in the Land of Long Trees".
This study was previously identified under the legacy study ID S2507
(Status: Published).
Citation
Marshall D. 2010. Cryptic Failure of Partitioned Bayesian Phylogenetic Analyses: Lost in the Land of Long Trees. Systematic Biology, 59(1): 108-117.
Authors
Abstract
Partitioned Bayesian phylogenetic analyses of routine genetic datasets, constructed using MrBayes (Ronquist and Huelsenbeck, 2003), can become trapped in regions of parameter space characterized by unrealistically long trees and distorted partition-rate multipliers. Such analyses commonly fail to reach stationarity during hundreds of millions of generations of sampling many times longer than most published analyses. Some datasets are so prone to this problem that paired MrBayes runs begun from different starting trees repeatedly find the same incorrect long-tree solutions and consequently pass the most commonly employed tests of stationarity, including the ASDSF and PSRF statistics offered by MrBayes (Gelman and Rubin, 1992). In these situations, failure to reach stationarity is recognizable only in light of prior knowledge of model parameters, such as the expectation that third-codon-position sites usually evolve fastest in protein-coding genes. The conditions that lead to the long-tree problem are frequently encountered in phylogenetic studies today, and I present six demonstration examples from the literature. Although the effects on tree length are often dramatic, effects on topology appear to be subtle. Susceptibility to the problem is sometimes predicted by the difference between the true tree length and the starting tree length. In some cases, the problems described here can be avoided or reduced by manipulation of the starting tree length and/or by adjustments to the prior on branch lengths. In more difficult situations, accurate branch length estimation may not be possible with Bayesian methods because of dependence of the solution on the branch length prior.
External links
About this resource
- Canonical resource URI:
http://purl.org/phylo/treebase/phylows/study/TB2:S10163
- Other versions:
Nexus
NeXML
- Show BibTeX reference
@ARTICLE{TreeBASE2Ref18654,
author = {David C. Marshall},
title = {Cryptic Failure of Partitioned Bayesian Phylogenetic Analyses: Lost in the Land of Long Trees},
year = {2010},
keywords = {},
doi = {10.1093/sysbio/syp080},
url = {},
pmid = {},
journal = {Systematic Biology},
volume = {59},
number = {1},
pages = {108--117},
abstract = {Partitioned Bayesian phylogenetic analyses of routine genetic datasets, constructed using MrBayes (Ronquist and Huelsenbeck, 2003), can become trapped in regions of parameter space characterized by unrealistically long trees and distorted partition-rate multipliers. Such analyses commonly fail to reach stationarity during hundreds of millions of generations of sampling many times longer than most published analyses. Some datasets are so prone to this problem that paired MrBayes runs begun from different starting trees repeatedly find the same incorrect long-tree solutions and consequently pass the most commonly employed tests of stationarity, including the ASDSF and PSRF statistics offered by MrBayes (Gelman and Rubin, 1992). In these situations, failure to reach stationarity is recognizable only in light of prior knowledge of model parameters, such as the expectation that third-codon-position sites usually evolve fastest in protein-coding genes. The conditions that lead to the long-tree problem are frequently encountered in phylogenetic studies today, and I present six demonstration examples from the literature. Although the effects on tree length are often dramatic, effects on topology appear to be subtle. Susceptibility to the problem is sometimes predicted by the difference between the true tree length and the starting tree length. In some cases, the problems described here can be avoided or reduced by manipulation of the starting tree length and/or by adjustments to the prior on branch lengths. In more difficult situations, accurate branch length estimation may not be possible with Bayesian methods because of dependence of the solution on the branch length prior.}
}
- Show RIS reference
TY - JOUR
ID - 18654
AU - Marshall,David C.
T1 - Cryptic Failure of Partitioned Bayesian Phylogenetic Analyses: Lost in the Land of Long Trees
PY - 2010
UR - http://dx.doi.org/10.1093/sysbio/syp080
N2 - Partitioned Bayesian phylogenetic analyses of routine genetic datasets, constructed using MrBayes (Ronquist and Huelsenbeck, 2003), can become trapped in regions of parameter space characterized by unrealistically long trees and distorted partition-rate multipliers. Such analyses commonly fail to reach stationarity during hundreds of millions of generations of sampling many times longer than most published analyses. Some datasets are so prone to this problem that paired MrBayes runs begun from different starting trees repeatedly find the same incorrect long-tree solutions and consequently pass the most commonly employed tests of stationarity, including the ASDSF and PSRF statistics offered by MrBayes (Gelman and Rubin, 1992). In these situations, failure to reach stationarity is recognizable only in light of prior knowledge of model parameters, such as the expectation that third-codon-position sites usually evolve fastest in protein-coding genes. The conditions that lead to the long-tree problem are frequently encountered in phylogenetic studies today, and I present six demonstration examples from the literature. Although the effects on tree length are often dramatic, effects on topology appear to be subtle. Susceptibility to the problem is sometimes predicted by the difference between the true tree length and the starting tree length. In some cases, the problems described here can be avoided or reduced by manipulation of the starting tree length and/or by adjustments to the prior on branch lengths. In more difficult situations, accurate branch length estimation may not be possible with Bayesian methods because of dependence of the solution on the branch length prior.
L3 - 10.1093/sysbio/syp080
JF - Systematic Biology
VL - 59
IS - 1
SP - 108
EP - 117
ER -