@ARTICLE{TreeBASE2Ref20067,
author = {R Eric Collins and Paul Higgs},
title = {Testing the Infinitely Many Genes Model for the Evolution of the Bacterial Core Genome and Pangenome},
year = {2012},
keywords = {Bacteria, Genome, Pangenome, Evolution},
doi = {},
url = {http://reric.org/files/pubs/Collins_Higgs_2012-Pangenome.pdf},
pmid = {},
journal = {Molecular Biology and Evolution},
volume = {},
number = {},
pages = {},
abstract = {When groups of related bacterial genomes are compared, the number of core genes found in all genomes is usually much less than the mean genome size, while the size of the pangenome (the set of genes found on at least one of the genomes) is much larger than the mean size of one genome. We analyze 172 complete genomes of Bacilli and compare the properties of the pangenomes and core genomes of monophyletic subsets taken from this group. We then assess the capabilities of several evolutionary models to predict these properties. The infinitely many genes (IMG) model is based on the assumption that each new gene can arise only once. The predictions of the model depend on the shape of the evolutionary tree that underlies the divergence of the genomes. We calculate results for coalescent trees, star trees, and arbitrary phylogenetic trees of predefined fixed branch length. On a star tree, the pangenome size increases linearly with the number of genomes, as has been suggested in some previous studies, whereas on a coalescent tree, it increases logarithmically. The coalescent tree gives a better fit to the data, for all the examples we consider. In some cases, a fixed phylogenetic tree proved better than the coalescent tree at reproducing structure in the gene frequency spectrum, but little improvement was gained in predictions of the core and pangenome sizes. Most of the data are well explained by a model with three classes of gene: an essential class that is found in all genomes, a slow class, whose rate of origination and deletion is slow compared to the time of divergence of the genomes, and a fast class showing rapid origination and deletion. Although the majority of genes originating in a genome are in the fast class, these genes are not retained for long periods, and the majority of genes present in a genome are in the slow or essential classes. In general, we show that the IMG model is useful for comparison with experimental genome data both for species-level and widely-divergent taxonomic groups. Software implementing the described formulae is provided at http://github.com/rec3141/pangenome.}
}
Citation for Study 11647
Citation title:
"Testing the Infinitely Many Genes Model for the Evolution of the Bacterial Core Genome and Pangenome".
Study name:
"Testing the Infinitely Many Genes Model for the Evolution of the Bacterial Core Genome and Pangenome".
This study is part of submission 11637
(Status: Published).
Citation
Collins R.E., & Higgs P. 2012. Testing the Infinitely Many Genes Model for the Evolution of the Bacterial Core Genome and Pangenome. Molecular Biology and Evolution, .
Authors
-
Collins R.E.
(submitter)
-
Higgs P.
Abstract
When groups of related bacterial genomes are compared, the number of core genes found in all genomes is usually much less than the mean genome size, while the size of the pangenome (the set of genes found on at least one of the genomes) is much larger than the mean size of one genome. We analyze 172 complete genomes of Bacilli and compare the properties of the pangenomes and core genomes of monophyletic subsets taken from this group. We then assess the capabilities of several evolutionary models to predict these properties. The infinitely many genes (IMG) model is based on the assumption that each new gene can arise only once. The predictions of the model depend on the shape of the evolutionary tree that underlies the divergence of the genomes. We calculate results for coalescent trees, star trees, and arbitrary phylogenetic trees of predefined fixed branch length. On a star tree, the pangenome size increases linearly with the number of genomes, as has been suggested in some previous studies, whereas on a coalescent tree, it increases logarithmically. The coalescent tree gives a better fit to the data, for all the examples we consider. In some cases, a fixed phylogenetic tree proved better than the coalescent tree at reproducing structure in the gene frequency spectrum, but little improvement was gained in predictions of the core and pangenome sizes. Most of the data are well explained by a model with three classes of gene: an essential class that is found in all genomes, a slow class, whose rate of origination and deletion is slow compared to the time of divergence of the genomes, and a fast class showing rapid origination and deletion. Although the majority of genes originating in a genome are in the fast class, these genes are not retained for long periods, and the majority of genes present in a genome are in the slow or essential classes. In general, we show that the IMG model is useful for comparison with experimental genome data both for species-level and widely-divergent taxonomic groups. Software implementing the described formulae is provided at http://github.com/rec3141/pangenome.
Keywords
Bacteria, Genome, Pangenome, Evolution
External links
About this resource
- Canonical resource URI:
http://purl.org/phylo/treebase/phylows/study/TB2:S11647
- Other versions:
Nexus
NeXML
- Show BibTeX reference
@ARTICLE{TreeBASE2Ref20067,
author = {R Eric Collins and Paul Higgs},
title = {Testing the Infinitely Many Genes Model for the Evolution of the Bacterial Core Genome and Pangenome},
year = {2012},
keywords = {Bacteria, Genome, Pangenome, Evolution},
doi = {},
url = {http://reric.org/files/pubs/Collins_Higgs_2012-Pangenome.pdf},
pmid = {},
journal = {Molecular Biology and Evolution},
volume = {},
number = {},
pages = {},
abstract = {When groups of related bacterial genomes are compared, the number of core genes found in all genomes is usually much less than the mean genome size, while the size of the pangenome (the set of genes found on at least one of the genomes) is much larger than the mean size of one genome. We analyze 172 complete genomes of Bacilli and compare the properties of the pangenomes and core genomes of monophyletic subsets taken from this group. We then assess the capabilities of several evolutionary models to predict these properties. The infinitely many genes (IMG) model is based on the assumption that each new gene can arise only once. The predictions of the model depend on the shape of the evolutionary tree that underlies the divergence of the genomes. We calculate results for coalescent trees, star trees, and arbitrary phylogenetic trees of predefined fixed branch length. On a star tree, the pangenome size increases linearly with the number of genomes, as has been suggested in some previous studies, whereas on a coalescent tree, it increases logarithmically. The coalescent tree gives a better fit to the data, for all the examples we consider. In some cases, a fixed phylogenetic tree proved better than the coalescent tree at reproducing structure in the gene frequency spectrum, but little improvement was gained in predictions of the core and pangenome sizes. Most of the data are well explained by a model with three classes of gene: an essential class that is found in all genomes, a slow class, whose rate of origination and deletion is slow compared to the time of divergence of the genomes, and a fast class showing rapid origination and deletion. Although the majority of genes originating in a genome are in the fast class, these genes are not retained for long periods, and the majority of genes present in a genome are in the slow or essential classes. In general, we show that the IMG model is useful for comparison with experimental genome data both for species-level and widely-divergent taxonomic groups. Software implementing the described formulae is provided at http://github.com/rec3141/pangenome.}
}
- Show RIS reference
TY - JOUR
ID - 20067
AU - Collins,R Eric
AU - Higgs,Paul
T1 - Testing the Infinitely Many Genes Model for the Evolution of the Bacterial Core Genome and Pangenome
PY - 2012
KW - Bacteria
KW - Genome
KW - Pangenome
KW - Evolution
UR - http://reric.org/files/pubs/Collins_Higgs_2012-Pangenome.pdf
N2 - When groups of related bacterial genomes are compared, the number of core genes found in all genomes is usually much less than the mean genome size, while the size of the pangenome (the set of genes found on at least one of the genomes) is much larger than the mean size of one genome. We analyze 172 complete genomes of Bacilli and compare the properties of the pangenomes and core genomes of monophyletic subsets taken from this group. We then assess the capabilities of several evolutionary models to predict these properties. The infinitely many genes (IMG) model is based on the assumption that each new gene can arise only once. The predictions of the model depend on the shape of the evolutionary tree that underlies the divergence of the genomes. We calculate results for coalescent trees, star trees, and arbitrary phylogenetic trees of predefined fixed branch length. On a star tree, the pangenome size increases linearly with the number of genomes, as has been suggested in some previous studies, whereas on a coalescent tree, it increases logarithmically. The coalescent tree gives a better fit to the data, for all the examples we consider. In some cases, a fixed phylogenetic tree proved better than the coalescent tree at reproducing structure in the gene frequency spectrum, but little improvement was gained in predictions of the core and pangenome sizes. Most of the data are well explained by a model with three classes of gene: an essential class that is found in all genomes, a slow class, whose rate of origination and deletion is slow compared to the time of divergence of the genomes, and a fast class showing rapid origination and deletion. Although the majority of genes originating in a genome are in the fast class, these genes are not retained for long periods, and the majority of genes present in a genome are in the slow or essential classes. In general, we show that the IMG model is useful for comparison with experimental genome data both for species-level and widely-divergent taxonomic groups. Software implementing the described formulae is provided at http://github.com/rec3141/pangenome.
L3 -
JF - Molecular Biology and Evolution
VL -
IS -
ER -