@ARTICLE{TreeBASE2Ref18897,
author = {Maximilian P. Nesnidal and Martin Helmkampf and Iris Bruchhaus and Bernhard Hausdorf},
title = {Compositional Heterogeneity and Phylogenomic Inference of Metazoan Relationships},
year = {2010},
keywords = {Metazoa, phylogenomics, compositional bias},
doi = {10.1093/molbev/msq097},
url = {http://mbe.oxfordjournals.org/cgi/reprint/msq097v1},
pmid = {20382658},
journal = {Molecular Biology and Evolution},
volume = {27},
number = {6},
pages = {1--10},
abstract = {Compositional heterogeneity of sequences between taxa may cause systematic error in phylogenetic inference. The potential influence of such bias might be mitigated by strategies to reduce compositional heterogeneity in the dataset, or by phylogeny reconstruction methods that account for compositional heterogeneity. We adopted several of these strategies to analyze a large ribosomal protein dataset representing all major metazoan taxa. Posterior predictive tests revealed that there is compositional bias in this dataset. Only a few taxa with strongly deviating amino acid composition had to be excluded to reduce this bias. Thus, this is a good solution, if these taxa are not central to the phylogenetic question at hand. Deleting individual proteins from the data matrix may be an appropriate method, if compositional heterogeneity among taxa is concentrated in a few proteins. However, half of the ribosomal proteins had to be excluded to reduce the compositional heterogeneity to a degree that the CAT model was no longer significantly violated. Recoding of amino acids into groups is another alternative, but causes a loss of information and may result in badly resolved trees as demonstrated by the present dataset. Bayesian inference with the CAT-BP model directly accounts for compositional heterogeneity between lineages by introducing breakpoints along the branches of the phylogeny at which the amino acid composition is allowed to change, but is computationally expensive. Finally, a neighbor-joining tree based on equal input distances that consider pattern and rate heterogeneity showed several unusual groupings, which are most likely artifacts, probably caused by the loss of information resulting from the transformation of the sequence data into distances. As long as no more efficient phylogenetic inference methods are available that can directly account for compositional heterogeneity in large datasets, using methods for reducing compositional heterogeneity in the data in combination with methods that assume a stationary amino acid composition remains an option for controlling systematic errors in tree reconstruction that result from compositional bias. Our analyses indicated that the paraphyly of Deuterostomia in some analyses is the result of systematic errors that also affected the relationships of Entoprocta and Ectoprocta.}
}
Citation for Study 10436
Citation title:
"Compositional Heterogeneity and Phylogenomic Inference of Metazoan Relationships".
Study name:
"Compositional Heterogeneity and Phylogenomic Inference of Metazoan Relationships".
This study is part of submission 10426
(Status: Published).
Citation
Nesnidal M., Helmkampf M., Bruchhaus I., & Hausdorf B. 2010. Compositional Heterogeneity and Phylogenomic Inference of Metazoan Relationships. Molecular Biology and Evolution, 27(6): 1-10.
Authors
-
Nesnidal M.
-
Helmkampf M.
-
Bruchhaus I.
-
Hausdorf B.
Abstract
Compositional heterogeneity of sequences between taxa may cause systematic error in phylogenetic inference. The potential influence of such bias might be mitigated by strategies to reduce compositional heterogeneity in the dataset, or by phylogeny reconstruction methods that account for compositional heterogeneity. We adopted several of these strategies to analyze a large ribosomal protein dataset representing all major metazoan taxa. Posterior predictive tests revealed that there is compositional bias in this dataset. Only a few taxa with strongly deviating amino acid composition had to be excluded to reduce this bias. Thus, this is a good solution, if these taxa are not central to the phylogenetic question at hand. Deleting individual proteins from the data matrix may be an appropriate method, if compositional heterogeneity among taxa is concentrated in a few proteins. However, half of the ribosomal proteins had to be excluded to reduce the compositional heterogeneity to a degree that the CAT model was no longer significantly violated. Recoding of amino acids into groups is another alternative, but causes a loss of information and may result in badly resolved trees as demonstrated by the present dataset. Bayesian inference with the CAT-BP model directly accounts for compositional heterogeneity between lineages by introducing breakpoints along the branches of the phylogeny at which the amino acid composition is allowed to change, but is computationally expensive. Finally, a neighbor-joining tree based on equal input distances that consider pattern and rate heterogeneity showed several unusual groupings, which are most likely artifacts, probably caused by the loss of information resulting from the transformation of the sequence data into distances. As long as no more efficient phylogenetic inference methods are available that can directly account for compositional heterogeneity in large datasets, using methods for reducing compositional heterogeneity in the data in combination with methods that assume a stationary amino acid composition remains an option for controlling systematic errors in tree reconstruction that result from compositional bias. Our analyses indicated that the paraphyly of Deuterostomia in some analyses is the result of systematic errors that also affected the relationships of Entoprocta and Ectoprocta.
Keywords
Metazoa, phylogenomics, compositional bias
External links
About this resource
- Canonical resource URI:
http://purl.org/phylo/treebase/phylows/study/TB2:S10436
- Other versions:
Nexus
NeXML
- Show BibTeX reference
@ARTICLE{TreeBASE2Ref18897,
author = {Maximilian P. Nesnidal and Martin Helmkampf and Iris Bruchhaus and Bernhard Hausdorf},
title = {Compositional Heterogeneity and Phylogenomic Inference of Metazoan Relationships},
year = {2010},
keywords = {Metazoa, phylogenomics, compositional bias},
doi = {10.1093/molbev/msq097},
url = {http://mbe.oxfordjournals.org/cgi/reprint/msq097v1},
pmid = {20382658},
journal = {Molecular Biology and Evolution},
volume = {27},
number = {6},
pages = {1--10},
abstract = {Compositional heterogeneity of sequences between taxa may cause systematic error in phylogenetic inference. The potential influence of such bias might be mitigated by strategies to reduce compositional heterogeneity in the dataset, or by phylogeny reconstruction methods that account for compositional heterogeneity. We adopted several of these strategies to analyze a large ribosomal protein dataset representing all major metazoan taxa. Posterior predictive tests revealed that there is compositional bias in this dataset. Only a few taxa with strongly deviating amino acid composition had to be excluded to reduce this bias. Thus, this is a good solution, if these taxa are not central to the phylogenetic question at hand. Deleting individual proteins from the data matrix may be an appropriate method, if compositional heterogeneity among taxa is concentrated in a few proteins. However, half of the ribosomal proteins had to be excluded to reduce the compositional heterogeneity to a degree that the CAT model was no longer significantly violated. Recoding of amino acids into groups is another alternative, but causes a loss of information and may result in badly resolved trees as demonstrated by the present dataset. Bayesian inference with the CAT-BP model directly accounts for compositional heterogeneity between lineages by introducing breakpoints along the branches of the phylogeny at which the amino acid composition is allowed to change, but is computationally expensive. Finally, a neighbor-joining tree based on equal input distances that consider pattern and rate heterogeneity showed several unusual groupings, which are most likely artifacts, probably caused by the loss of information resulting from the transformation of the sequence data into distances. As long as no more efficient phylogenetic inference methods are available that can directly account for compositional heterogeneity in large datasets, using methods for reducing compositional heterogeneity in the data in combination with methods that assume a stationary amino acid composition remains an option for controlling systematic errors in tree reconstruction that result from compositional bias. Our analyses indicated that the paraphyly of Deuterostomia in some analyses is the result of systematic errors that also affected the relationships of Entoprocta and Ectoprocta.}
}
- Show RIS reference
TY - JOUR
ID - 18897
AU - Nesnidal,Maximilian P.
AU - Helmkampf,Martin
AU - Bruchhaus,Iris
AU - Hausdorf,Bernhard
T1 - Compositional Heterogeneity and Phylogenomic Inference of Metazoan Relationships
PY - 2010
KW - Metazoa
KW - phylogenomics
KW - compositional bias
UR - http://mbe.oxfordjournals.org/cgi/reprint/msq097v1
N2 - Compositional heterogeneity of sequences between taxa may cause systematic error in phylogenetic inference. The potential influence of such bias might be mitigated by strategies to reduce compositional heterogeneity in the dataset, or by phylogeny reconstruction methods that account for compositional heterogeneity. We adopted several of these strategies to analyze a large ribosomal protein dataset representing all major metazoan taxa. Posterior predictive tests revealed that there is compositional bias in this dataset. Only a few taxa with strongly deviating amino acid composition had to be excluded to reduce this bias. Thus, this is a good solution, if these taxa are not central to the phylogenetic question at hand. Deleting individual proteins from the data matrix may be an appropriate method, if compositional heterogeneity among taxa is concentrated in a few proteins. However, half of the ribosomal proteins had to be excluded to reduce the compositional heterogeneity to a degree that the CAT model was no longer significantly violated. Recoding of amino acids into groups is another alternative, but causes a loss of information and may result in badly resolved trees as demonstrated by the present dataset. Bayesian inference with the CAT-BP model directly accounts for compositional heterogeneity between lineages by introducing breakpoints along the branches of the phylogeny at which the amino acid composition is allowed to change, but is computationally expensive. Finally, a neighbor-joining tree based on equal input distances that consider pattern and rate heterogeneity showed several unusual groupings, which are most likely artifacts, probably caused by the loss of information resulting from the transformation of the sequence data into distances. As long as no more efficient phylogenetic inference methods are available that can directly account for compositional heterogeneity in large datasets, using methods for reducing compositional heterogeneity in the data in combination with methods that assume a stationary amino acid composition remains an option for controlling systematic errors in tree reconstruction that result from compositional bias. Our analyses indicated that the paraphyly of Deuterostomia in some analyses is the result of systematic errors that also affected the relationships of Entoprocta and Ectoprocta.
L3 - 10.1093/molbev/msq097
JF - Molecular Biology and Evolution
VL - 27
IS - 6
SP - 1
EP - 10
ER -