@ARTICLE{TreeBASE2Ref16336,
author = {Chunjie Li and Guoqing Lu and Guillermo Ort?},
title = {Optimal data partitioning and a test case for ray-finned fishes (Actinopterygii) based on ten nuclear loci.},
year = {2008},
keywords = {},
doi = {10.1080/10635150802206883},
url = {},
pmid = {},
journal = {Systematic Biology},
volume = {57},
number = {4},
pages = {519--539},
abstract = {Data partitioning, the combined phylogenetic analysis of homogeneous blocks of data, is a common strategy used to accommodate heterogeneities in complex multilocus data sets. Variation in evolutionary rates and substitution patterns among sites are typically addressed by partitioning data by gene, codon position, or both. Excessive partitioning of the data, however, could lead to overparameterization, therefore it seems reasonable to define smaller numbers of partitions to improve the fit of the model. We propose a new method, based on cluster analysis, to find an optimal partitioning strategy for multilocus protein-coding data sets. A heuristic exploration of alternative partitioning schemes, based on Bayesian and maximum-likelihood criteria is shown here to produce an optimal number of partitions. We tested this method using sequence data of ten nuclear genes collected from 52 ray-finned fish (Actinopterygii) and four tetrapods. The concatenated sequences included 7995 nucleotide sites split into 30 partitions defined a priori based on gene and codon position. Our results show that a model based on only 10 partitions defined by cluster analysis performed better than partitioning by both gene and codon position. Alternative data partitioning schemes also are shown to affect the topologies resulting from phylogenetic analysis, especially when Bayesian methods are used, suggesting that over-partitioning may be of major concern when Bayesian approach is used. The phylogenetic relationships among the major clades of ray-finned fish were assessed using the best data partitioning schemes under ML and Bayesian methods. Some significant results include the monophyly of ?Holostei? (Amia and Lepisosteus), the sister-group relationships between (1) esociforms and salmoniforms, (2) osmeriforms and stomiiforms, the polyphyly of Perciformes, and a close relationship of cichlids and atherinomorphs.}
}
Citation for Study 2045
Citation title:
"Optimal data partitioning and a test case for ray-finned fishes (Actinopterygii) based on ten nuclear loci.".
This study was previously identified under the legacy study ID S2044
(Status: Published).
Citation
Li C., Lu G., & Ort? G. 2008. Optimal data partitioning and a test case for ray-finned fishes (Actinopterygii) based on ten nuclear loci. Systematic Biology, 57(4): 519-539.
Authors
Abstract
Data partitioning, the combined phylogenetic analysis of homogeneous blocks of data, is a common strategy used to accommodate heterogeneities in complex multilocus data sets. Variation in evolutionary rates and substitution patterns among sites are typically addressed by partitioning data by gene, codon position, or both. Excessive partitioning of the data, however, could lead to overparameterization, therefore it seems reasonable to define smaller numbers of partitions to improve the fit of the model. We propose a new method, based on cluster analysis, to find an optimal partitioning strategy for multilocus protein-coding data sets. A heuristic exploration of alternative partitioning schemes, based on Bayesian and maximum-likelihood criteria is shown here to produce an optimal number of partitions. We tested this method using sequence data of ten nuclear genes collected from 52 ray-finned fish (Actinopterygii) and four tetrapods. The concatenated sequences included 7995 nucleotide sites split into 30 partitions defined a priori based on gene and codon position. Our results show that a model based on only 10 partitions defined by cluster analysis performed better than partitioning by both gene and codon position. Alternative data partitioning schemes also are shown to affect the topologies resulting from phylogenetic analysis, especially when Bayesian methods are used, suggesting that over-partitioning may be of major concern when Bayesian approach is used. The phylogenetic relationships among the major clades of ray-finned fish were assessed using the best data partitioning schemes under ML and Bayesian methods. Some significant results include the monophyly of ?Holostei? (Amia and Lepisosteus), the sister-group relationships between (1) esociforms and salmoniforms, (2) osmeriforms and stomiiforms, the polyphyly of Perciformes, and a close relationship of cichlids and atherinomorphs.
External links
About this resource
- Canonical resource URI:
http://purl.org/phylo/treebase/phylows/study/TB2:S2045
- Other versions:
Nexus
NeXML
- Show BibTeX reference
@ARTICLE{TreeBASE2Ref16336,
author = {Chunjie Li and Guoqing Lu and Guillermo Ort?},
title = {Optimal data partitioning and a test case for ray-finned fishes (Actinopterygii) based on ten nuclear loci.},
year = {2008},
keywords = {},
doi = {10.1080/10635150802206883},
url = {},
pmid = {},
journal = {Systematic Biology},
volume = {57},
number = {4},
pages = {519--539},
abstract = {Data partitioning, the combined phylogenetic analysis of homogeneous blocks of data, is a common strategy used to accommodate heterogeneities in complex multilocus data sets. Variation in evolutionary rates and substitution patterns among sites are typically addressed by partitioning data by gene, codon position, or both. Excessive partitioning of the data, however, could lead to overparameterization, therefore it seems reasonable to define smaller numbers of partitions to improve the fit of the model. We propose a new method, based on cluster analysis, to find an optimal partitioning strategy for multilocus protein-coding data sets. A heuristic exploration of alternative partitioning schemes, based on Bayesian and maximum-likelihood criteria is shown here to produce an optimal number of partitions. We tested this method using sequence data of ten nuclear genes collected from 52 ray-finned fish (Actinopterygii) and four tetrapods. The concatenated sequences included 7995 nucleotide sites split into 30 partitions defined a priori based on gene and codon position. Our results show that a model based on only 10 partitions defined by cluster analysis performed better than partitioning by both gene and codon position. Alternative data partitioning schemes also are shown to affect the topologies resulting from phylogenetic analysis, especially when Bayesian methods are used, suggesting that over-partitioning may be of major concern when Bayesian approach is used. The phylogenetic relationships among the major clades of ray-finned fish were assessed using the best data partitioning schemes under ML and Bayesian methods. Some significant results include the monophyly of ?Holostei? (Amia and Lepisosteus), the sister-group relationships between (1) esociforms and salmoniforms, (2) osmeriforms and stomiiforms, the polyphyly of Perciformes, and a close relationship of cichlids and atherinomorphs.}
}
- Show RIS reference
TY - JOUR
ID - 16336
AU - Li,Chunjie
AU - Lu,Guoqing
AU - Ort?,Guillermo
T1 - Optimal data partitioning and a test case for ray-finned fishes (Actinopterygii) based on ten nuclear loci.
PY - 2008
KW -
UR - http://dx.doi.org/10.1080/10635150802206883
N2 - Data partitioning, the combined phylogenetic analysis of homogeneous blocks of data, is a common strategy used to accommodate heterogeneities in complex multilocus data sets. Variation in evolutionary rates and substitution patterns among sites are typically addressed by partitioning data by gene, codon position, or both. Excessive partitioning of the data, however, could lead to overparameterization, therefore it seems reasonable to define smaller numbers of partitions to improve the fit of the model. We propose a new method, based on cluster analysis, to find an optimal partitioning strategy for multilocus protein-coding data sets. A heuristic exploration of alternative partitioning schemes, based on Bayesian and maximum-likelihood criteria is shown here to produce an optimal number of partitions. We tested this method using sequence data of ten nuclear genes collected from 52 ray-finned fish (Actinopterygii) and four tetrapods. The concatenated sequences included 7995 nucleotide sites split into 30 partitions defined a priori based on gene and codon position. Our results show that a model based on only 10 partitions defined by cluster analysis performed better than partitioning by both gene and codon position. Alternative data partitioning schemes also are shown to affect the topologies resulting from phylogenetic analysis, especially when Bayesian methods are used, suggesting that over-partitioning may be of major concern when Bayesian approach is used. The phylogenetic relationships among the major clades of ray-finned fish were assessed using the best data partitioning schemes under ML and Bayesian methods. Some significant results include the monophyly of ?Holostei? (Amia and Lepisosteus), the sister-group relationships between (1) esociforms and salmoniforms, (2) osmeriforms and stomiiforms, the polyphyly of Perciformes, and a close relationship of cichlids and atherinomorphs.
L3 - 10.1080/10635150802206883
JF - Systematic Biology
VL - 57
IS - 4
SP - 519
EP - 539
ER -