@ARTICLE{TreeBASE2Ref17720,
author = {Edward Susko and Chris Field and Christian Blouin and Andrew J. Roger},
title = {Estimation of Rates-Across-Sites Distributions in Phylogenetic Substitution Models.},
year = {2003},
keywords = {},
doi = {10.1080/10635150390235395},
url = {},
pmid = {},
journal = {Systematic Biology},
volume = {52},
number = {5},
pages = {594--603},
abstract = {Previous work has shown that it is often essential to account for the variation in rates at different sites in phylogenetic models in order to avoid phylogenetic artifacts such as long branch attraction. In most current models, the gamma distribution is used for the rates-across-sites distributions and is implemented as an equal-probability discrete gamma. In this paper, we introduce discrete distribution estimates with large numbers of equally spaced rate categories allowing us to investigate the appropriateness of the gamma model. With large numbers of rate categories, these discrete estimates are flexible enough to approximate the shape of almost any distribution. Likelihood ratio statistic tests and a nonparametric bootstrap confidence bound estimation procedure based on the discrete estimates are presented that can be used to test the fit of a parametric family. We apply the methodology to several different protein data sets and find that although the gamma model often provides a good parametric model for this type of data, rate estimates from an equal-probability discrete gamma model with a small number of categories will tend to underestimate the largest rates. In cases when the gamma model assumption is in doubt, rate estimates coming from the discrete rate distribution estimate with a large number of rate categories provide a robust alternative to gamma estimates. An alternative implementation of the gamma distribution is proposed that, for equal numbers of rate categories, is computationally more efficient during optimization than the standard gamma implementation and can provide more accurate estimates of site rates.}
}
Citation for Study 1019
Citation title:
"Estimation of Rates-Across-Sites Distributions in Phylogenetic Substitution Models.".
This study was previously identified under the legacy study ID S910
(Status: Published).
Citation
Susko E., Field C., Blouin C., & Roger A. 2003. Estimation of Rates-Across-Sites Distributions in Phylogenetic Substitution Models. Systematic Biology, 52(5): 594-603.
Authors
-
Susko E.
-
Field C.
-
Blouin C.
-
Roger A.
Abstract
Previous work has shown that it is often essential to account for the variation in rates at different sites in phylogenetic models in order to avoid phylogenetic artifacts such as long branch attraction. In most current models, the gamma distribution is used for the rates-across-sites distributions and is implemented as an equal-probability discrete gamma. In this paper, we introduce discrete distribution estimates with large numbers of equally spaced rate categories allowing us to investigate the appropriateness of the gamma model. With large numbers of rate categories, these discrete estimates are flexible enough to approximate the shape of almost any distribution. Likelihood ratio statistic tests and a nonparametric bootstrap confidence bound estimation procedure based on the discrete estimates are presented that can be used to test the fit of a parametric family. We apply the methodology to several different protein data sets and find that although the gamma model often provides a good parametric model for this type of data, rate estimates from an equal-probability discrete gamma model with a small number of categories will tend to underestimate the largest rates. In cases when the gamma model assumption is in doubt, rate estimates coming from the discrete rate distribution estimate with a large number of rate categories provide a robust alternative to gamma estimates. An alternative implementation of the gamma distribution is proposed that, for equal numbers of rate categories, is computationally more efficient during optimization than the standard gamma implementation and can provide more accurate estimates of site rates.
External links
About this resource
- Canonical resource URI:
http://purl.org/phylo/treebase/phylows/study/TB2:S1019
- Other versions:
Nexus
NeXML
- Show BibTeX reference
@ARTICLE{TreeBASE2Ref17720,
author = {Edward Susko and Chris Field and Christian Blouin and Andrew J. Roger},
title = {Estimation of Rates-Across-Sites Distributions in Phylogenetic Substitution Models.},
year = {2003},
keywords = {},
doi = {10.1080/10635150390235395},
url = {},
pmid = {},
journal = {Systematic Biology},
volume = {52},
number = {5},
pages = {594--603},
abstract = {Previous work has shown that it is often essential to account for the variation in rates at different sites in phylogenetic models in order to avoid phylogenetic artifacts such as long branch attraction. In most current models, the gamma distribution is used for the rates-across-sites distributions and is implemented as an equal-probability discrete gamma. In this paper, we introduce discrete distribution estimates with large numbers of equally spaced rate categories allowing us to investigate the appropriateness of the gamma model. With large numbers of rate categories, these discrete estimates are flexible enough to approximate the shape of almost any distribution. Likelihood ratio statistic tests and a nonparametric bootstrap confidence bound estimation procedure based on the discrete estimates are presented that can be used to test the fit of a parametric family. We apply the methodology to several different protein data sets and find that although the gamma model often provides a good parametric model for this type of data, rate estimates from an equal-probability discrete gamma model with a small number of categories will tend to underestimate the largest rates. In cases when the gamma model assumption is in doubt, rate estimates coming from the discrete rate distribution estimate with a large number of rate categories provide a robust alternative to gamma estimates. An alternative implementation of the gamma distribution is proposed that, for equal numbers of rate categories, is computationally more efficient during optimization than the standard gamma implementation and can provide more accurate estimates of site rates.}
}
- Show RIS reference
TY - JOUR
ID - 17720
AU - Susko,Edward
AU - Field,Chris
AU - Blouin,Christian
AU - Roger,Andrew J.
T1 - Estimation of Rates-Across-Sites Distributions in Phylogenetic Substitution Models.
PY - 2003
KW -
UR - http://dx.doi.org/10.1080/10635150390235395
N2 - Previous work has shown that it is often essential to account for the variation in rates at different sites in phylogenetic models in order to avoid phylogenetic artifacts such as long branch attraction. In most current models, the gamma distribution is used for the rates-across-sites distributions and is implemented as an equal-probability discrete gamma. In this paper, we introduce discrete distribution estimates with large numbers of equally spaced rate categories allowing us to investigate the appropriateness of the gamma model. With large numbers of rate categories, these discrete estimates are flexible enough to approximate the shape of almost any distribution. Likelihood ratio statistic tests and a nonparametric bootstrap confidence bound estimation procedure based on the discrete estimates are presented that can be used to test the fit of a parametric family. We apply the methodology to several different protein data sets and find that although the gamma model often provides a good parametric model for this type of data, rate estimates from an equal-probability discrete gamma model with a small number of categories will tend to underestimate the largest rates. In cases when the gamma model assumption is in doubt, rate estimates coming from the discrete rate distribution estimate with a large number of rate categories provide a robust alternative to gamma estimates. An alternative implementation of the gamma distribution is proposed that, for equal numbers of rate categories, is computationally more efficient during optimization than the standard gamma implementation and can provide more accurate estimates of site rates.
L3 - 10.1080/10635150390235395
JF - Systematic Biology
VL - 52
IS - 5
SP - 594
EP - 603
ER -