2025
Hölzer, Martin; Reuschel, Charlotte; Vorimore, Fabien; Laroucau, Karine; Sachse, Konrad
In: Access Microbiology, vol. 7, 2025.
Abstract | Links | BibTeX | Tags: annotation, bacteria, DNA / genomics, evolution, pregnancy
@article{nokey_80,
title = {Exploring the genomic landscape of Chlamydiifrater species: novel features include multiple truncated major outer membrane proteins, unique genes and chlamydial plasticity zone orthologs},
author = {Martin Hölzer and Charlotte Reuschel and Fabien Vorimore and Karine Laroucau and Konrad Sachse},
doi = {10.1099/acmi.0.000936.v3},
year = {2025},
date = {2025-02-03},
urldate = {2025-02-03},
journal = {Access Microbiology},
volume = {7},
abstract = {Recently discovered obligate intracellular bacteria belonging to the genus Chlamydiifrater with the species of Chlamydiifrater phoenicopteri and Chlamydiifrater volucris were studied to explore the composition of their genomes and their relatedness to Chlamydia, the other genus of the family Chlamydiaceae. We investigated 4 isolates of Cf. volucris, 2 of them newly sequenced, and one of Cf. phoenicopteri alongside 12 representatives of the Chlamydia species. Our study uncovers previously unrecognized genomic structures within Chlamydiifrater using a hybrid sequencing approach and advanced annotation pipelines, providing insights into species-specific adaptations and evolutionary dynamics. The integration of long-read sequencing data, comprehensive re-annotation strategies and pan-genomics enabled the localization of the unique plasticity zone and the identification of novel gene clusters in Chlamydiifrater strains, which improves our understanding of chlamydial genome architecture and plasticity in the family Chlamydiaceae. Our analysis revealed that 761 CDS (~80%) are shared among members of both genera. We further identified 158 unique genes of Chlamydiifrater species, but their annotation remains challenging because of the absence of functionally annotated orthologs in public databases. A full-length ompA gene encoding the major outer membrane porin was seen in all Chlamydiifrater strains. We also describe the localization and structure of multiple truncated CDS of ompA family members, representing one of this study’s most interesting findings. While genome analysis of Chlamydiifrater spp. confirmed numerous common features shared with representatives of the genus Chlamydia, many unique genomic elements were identified that underpin the distinct phenotype and separate genetic position of these new microorganisms.},
keywords = {annotation, bacteria, DNA / genomics, evolution, pregnancy},
pubstate = {published},
tppubtype = {article}
}
2024
Lamkiewicz, Kevin; Barf, Lisa-Marie; Sachse, Konrad; Hölzer, Martin
RIBAP: a comprehensive bacterial core genome annotation pipeline for pangenome calculation beyond the species level Journal Article
In: Genome Biology, vol. 25, iss. 1, 2024.
Abstract | Links | BibTeX | Tags: annotation, bacteria, DNA / genomics, evolution, software
@article{nokey_63,
title = {RIBAP: a comprehensive bacterial core genome annotation pipeline for pangenome calculation beyond the species level},
author = {Kevin Lamkiewicz and Lisa-Marie Barf and Konrad Sachse and Martin Hölzer},
doi = {10.1186/s13059-024-03312-9},
year = {2024},
date = {2024-07-01},
journal = {Genome Biology},
volume = {25},
issue = {1},
abstract = {Microbial pangenome analysis identifies present or absent genes in prokaryotic genomes. However, current tools are limited when analyzing species with higher sequence diversity or higher taxonomic orders such as genera or families. The Roary ILP Bacterial core Annotation Pipeline (RIBAP) uses an integer linear programming approach to refine gene clusters predicted by Roary for identifying core genes. RIBAP successfully handles the complexity and diversity of Chlamydia, Klebsiella, Brucella, and Enterococcus genomes, outperforming other established and recent pangenome tools for identifying all-encompassing core genes at the genus level. RIBAP is a freely available Nextflow pipeline at github.com/hoelzer-lab/ribap and zenodo.org/doi/10.5281/zenodo.10890871.},
keywords = {annotation, bacteria, DNA / genomics, evolution, software},
pubstate = {published},
tppubtype = {article}
}
2023
Hufsky, Franziska; Abecasis, Ana B.; Babaian, Artem; Beck, Sebastian; Brierley, Liam; Dellicour, Simon; Eggeling, Christian; Elena, Santiago F.; Gieraths, Udo; Ha, Anh D.; Harvey, Will; Jones, Terry C.; Lamkiewicz, Kevin; Lovate, Gabriel L.; Lücking, Dominik; Machyna, Martin; Nishimura, Luca; Nocke, Maximilian K.; Renard, Bernard Y.; Sakaguchi, Shoichi; Sakellaridi, Lygeri; Spangenberg, Jannes; Tarradas-Alemany, Maria; Triebel, Sandra; Vakulenko, Yulia; Wijesekara, Rajitha Yasas; González-Candelas, Fernando; Krautwurst, Sarah; Pérez-Cataluña, Alba; Randazzo, Walter; Sánchez, Gloria; Marz, Manja
The International Virus Bioinformatics Meeting 2023 Journal Article
In: Viruses, vol. 15, iss. 10, 2023, ISSN: 1999-4915.
Abstract | Links | BibTeX | Tags: annotation, software, virus host interaction, viruses
@article{nokey_47,
title = {The International Virus Bioinformatics Meeting 2023},
author = {Franziska Hufsky and Ana B. Abecasis and Artem Babaian and Sebastian Beck and Liam Brierley and Simon Dellicour and Christian Eggeling and Santiago F. Elena and Udo Gieraths and Anh D. Ha and Will Harvey and Terry C. Jones and Kevin Lamkiewicz and Gabriel L. Lovate and Dominik Lücking and Martin Machyna and Luca Nishimura and Maximilian K. Nocke and Bernard Y. Renard and Shoichi Sakaguchi and Lygeri Sakellaridi and Jannes Spangenberg and Maria Tarradas-Alemany and Sandra Triebel and Yulia Vakulenko and Rajitha Yasas Wijesekara and Fernando González-Candelas and Sarah Krautwurst and Alba Pérez-Cataluña and Walter Randazzo and Gloria Sánchez and Manja Marz},
doi = {10.3390/v15102031},
issn = {1999-4915},
year = {2023},
date = {2023-09-30},
urldate = {2023-09-30},
journal = {Viruses},
volume = {15},
issue = {10},
abstract = {The 2023 International Virus Bioinformatics Meeting was held in Valencia, Spain, from 24–26 May 2023, attracting approximately 180 participants worldwide. The primary objective of the conference was to establish a dynamic scientific environment conducive to discussion, collaboration, and the generation of novel research ideas. As the first in-person event following the SARS-CoV-2 pandemic, the meeting facilitated highly interactive exchanges among attendees. It served as a pivotal gathering for gaining insights into the current status of virus bioinformatics research and engaging with leading researchers and emerging scientists. The event comprised eight invited talks, 19 contributed talks, and 74 poster presentations across eleven sessions spanning three days. Topics covered included machine learning, bacteriophages, virus discovery, virus classification, virus visualization, viral infection, viromics, molecular epidemiology, phylodynamic analysis, RNA viruses, viral sequence analysis, viral surveillance, and metagenomics. This report provides rewritten abstracts of the presentations, a summary of the key research findings, and highlights shared during the meeting.},
keywords = {annotation, software, virus host interaction, viruses},
pubstate = {published},
tppubtype = {article}
}
Rangel-Pineros, Guillermo; Almeida, Alexandre; Beracochea, Martin; Sakharova, Ekaterina; Marz, Manja; Muñoz, Alejandro Reyes; Hölzer, Martin; Finn, Robert D.
VIRify: An integrated detection, annotation and taxonomic classification pipeline using virus-specific protein profile hidden Markov models Journal Article
In: PLOS Comput Biol, vol. 19, iss. 8, pp. e1011422, 2023.
Abstract | Links | BibTeX | Tags: annotation, classification, metagenomics, phylogenetics, software, viruses
@article{nokey,
title = {VIRify: An integrated detection, annotation and taxonomic classification pipeline using virus-specific protein profile hidden Markov models},
author = {Guillermo Rangel-Pineros and Alexandre Almeida and Martin Beracochea and Ekaterina Sakharova and Manja Marz and Alejandro Reyes Muñoz and Martin Hölzer and Robert D. Finn },
doi = {10.1371/journal.pcbi.1011422},
year = {2023},
date = {2023-08-28},
journal = {PLOS Comput Biol},
volume = {19},
issue = {8},
pages = {e1011422},
abstract = {The study of viral communities has revealed the enormous diversity and impact these biological entities have on various ecosystems. These observations have sparked widespread interest in developing computational strategies that support the comprehensive characterisation of viral communities based on sequencing data. Here we introduce VIRify, a new computational pipeline designed to provide a user-friendly and accurate functional and taxonomic characterisation of viral communities. VIRify identifies viral contigs and prophages from metagenomic assemblies and annotates them using a collection of viral profile hidden Markov models (HMMs). These include our manually-curated profile HMMs, which serve as specific taxonomic markers for a wide range of prokaryotic and eukaryotic viral taxa and are thus used to reliably classify viral contigs. We tested VIRify on assemblies from two microbial mock communities, a large metagenomics study, and a collection of publicly available viral genomic sequences from the human gut. The results showed that VIRify could identify sequences from both prokaryotic and eukaryotic viruses, and provided taxonomic classifications from the genus to the family rank with an average accuracy of 86.6%. In addition, VIRify allowed the detection and taxonomic classification of a range of prokaryotic and eukaryotic viruses present in 243 marine metagenomic assemblies. Finally, the use of VIRify led to a large expansion in the number of taxonomically classified human gut viral sequences and the improvement of outdated and shallow taxonomic classifications. Overall, we demonstrate that VIRify is a novel and powerful resource that offers an enhanced capability to detect a broad range of viral contigs and taxonomically classify them.},
keywords = {annotation, classification, metagenomics, phylogenetics, software, viruses},
pubstate = {published},
tppubtype = {article}
}
2022
Hufsky, Franziska; Beslic, Denis; Boeckaerts, Dimitri; Duchene, Sebastian; González-Tortuero, Enrique; Gruber, Andreas J; Guo, Jiarong; Jansen, Daan; Juma, John; Kongkitimanon, Kunaphas; Luque, Antoni; Ritsch, Muriel; Lovate, Gabriel L.; Nishimura, Luca; Pas, Célia; Domingo, Esteban; Hodcroft, Emma; Lemey, Philippe; Sullivan, Matthew B; Weber, Friedemann; González-Candelas, Fernando; Krautwurst, Sarah; Pérez-Cataluña, Alba; Randazzo, Walter; Sánchez, Gloria; Marz, Manja
The International Virus Bioinformatics Meeting 2022 Journal Article
In: Viruses, vol. 14, iss. 5, pp. 973, 2022.
Abstract | Links | BibTeX | Tags: annotation, software, virus host interaction, viruses
@article{Hufsky2022,
title = {The International Virus Bioinformatics Meeting 2022},
author = {Franziska Hufsky and Denis Beslic and Dimitri Boeckaerts and Sebastian Duchene and Enrique González-Tortuero and Andreas J Gruber and Jiarong Guo and Daan Jansen and John Juma and Kunaphas Kongkitimanon and Antoni Luque and Muriel Ritsch and Gabriel L. Lovate and Luca Nishimura and Célia Pas and Esteban Domingo and Emma Hodcroft and Philippe Lemey and Matthew B Sullivan and Friedemann Weber and Fernando González-Candelas and Sarah Krautwurst and Alba Pérez-Cataluña and Walter Randazzo and Gloria Sánchez and Manja Marz },
doi = {10.3390/v14050973},
year = {2022},
date = {2022-05-05},
urldate = {2022-05-05},
journal = {Viruses},
volume = {14},
issue = {5},
pages = {973},
abstract = {The International Virus Bioinformatics Meeting 2022 took place online, on 23-25 March 2022, and has attracted about 380 participants from all over the world. The goal of the meeting was to provide a meaningful and interactive scientific environment to promote discussion and collaboration and to inspire and suggest new research directions and questions. The participants created a highly interactive scientific environment even without physical face-to-face interactions. This meeting is a focal point to gain an insight into the state-of-the-art of the virus bioinformatics research landscape and to interact with researchers in the forefront as well as aspiring young scientists. The meeting featured eight invited and 18 contributed talks in eight sessions on three days, as well as 52 posters, which were presented during three virtual poster sessions. The main topics were: SARS-CoV-2, viral emergence and surveillance, virus-host interactions, viral sequence analysis, virus identification and annotation, phages, and viral diversity. This report summarizes the main research findings and highlights presented at the meeting.},
keywords = {annotation, software, virus host interaction, viruses},
pubstate = {published},
tppubtype = {article}
}
2021
Martín-Hernández, Giselle C; Müller, Bettina; Chmielarz, Mikołaj; Brandt, Christian; Hölzer, Martin; Viehweger, Adrian; Passoth, Volkmar
Chromosome-level genome assembly and transcriptome-based annotation of the oleaginous yeast Rhodotorula toruloides CBS 14 Journal Article
In: Genomics, vol. 113, no. 6, pp. 4022-4027, 2021.
Abstract | Links | BibTeX | Tags: annotation, assembly, DNA / genomics, fungi, nanopore
@article{Martín-Hernández2021,
title = {Chromosome-level genome assembly and transcriptome-based annotation of the oleaginous yeast Rhodotorula toruloides CBS 14},
author = {Giselle C Martín-Hernández and Bettina Müller and Mikołaj Chmielarz and Christian Brandt and Martin Hölzer and Adrian Viehweger and Volkmar Passoth
},
doi = {10.1016/j.ygeno.2021.10.006},
year = {2021},
date = {2021-10-11},
urldate = {2021-10-11},
journal = {Genomics},
volume = {113},
number = {6},
pages = {4022-4027},
abstract = {Rhodotorula toruloides is an oleaginous yeast with high biotechnological potential. In order to understand the molecular physiology of lipid synthesis in R. toruloides and to advance metabolic engineering, a high-resolution genome is required. We constructed a genome draft of R. toruloides CBS 14, using a hybrid assembly approach, consisting of short and long reads generated by Illumina and Nanopore sequencing, respectively. The genome draft consists of 23 contigs and 3 scaffolds, with a N50 length of 1,529,952 bp, thus largely representing chromosomal organization. The total size of the genome is 20,534,857 bp and the overall GC content is 61.83%. Transcriptomic data from different growth conditions was used to aid species-specific gene annotation. We annotated 9464 genes and identified 11,691 transcripts. Furthermore, we demonstrated the presence of a potential plasmid, an extrachromosomal circular structure of about 11 kb with a copy number about three times as high as the other chromosomes.},
keywords = {annotation, assembly, DNA / genomics, fungi, nanopore},
pubstate = {published},
tppubtype = {article}
}
Mostajo, Nelly F.
Reston and Zaire ebolavirus life cycle and host cellular response PhD Thesis
2021.
Abstract | Links | BibTeX | Tags: annotation, differential expression analysis, virus host interaction, viruses
@phdthesis{nokey,
title = {Reston and Zaire ebolavirus life cycle and host cellular response},
author = {Nelly F. Mostajo},
doi = {10.22032/dbt.49230},
year = {2021},
date = {2021-04-14},
urldate = {2021-04-14},
abstract = {Ebolaviruses are negative strand RNA viruses which are known to cause Ebola virus disease (EVD) with a fatal outcome in primates. All five species of Ebolavirus can infect humans, but only four lead to EVD. The Ebolavirus with the most provoked outbreaks and highest fatality rate (above 80%) is Zaire ebolavirus (EBOV), while the one without any provoke symptoms in humans is Reston ebolavirus (RESTV). In order to determine the features which lead to the different outcomes from EBOV and RESTV the cellular response against these viruses, and the divergence between RESTV and EBOV life cycle inside human cells was investigated. To study the cellular response RNA of two human cell lines (HuH7 and THP1) infected with RESTV, EBOV and uninfected (Mock) at two different time points was analyzed. Using whole transcriptome screening with smallRNAseq, Microarray, de novo annotation and expression profiles it was possible to elucidate that the cellular response against RESTV and EBOV infection differs the most at 3 h p.i., this was consistent in HuH7 and THP1 cell lines. The transcriptomic study showed RESTV and EBOV stimulate a distinct set of genes related to cellular entry. Also, the transcriptomic data suggests EBOV transcribes and replicates faster than RESTV, supported by cellular components like snoRNAs, while RESTV is similar to Mock in this aspect. This finding was backed with an entry assay which showed EBOV releases its content into the cytosol faster than RESTV, pointing to differences in entry pathway or a better time controlled response from the cell against RESTV. To understand the life cycle of RESTV and EBOV in human cells transcription/replication, inclusion bodies, nucleocapsid (NC) transport, viral particle formation, and infection was studied. Selected genes which were differentially expressed between RESTV and EBOV infected cells were further analyzed on the virus life cycle context.},
howpublished = {Friedrich-Schiller-Universität Jena},
keywords = {annotation, differential expression analysis, virus host interaction, viruses},
pubstate = {published},
tppubtype = {phdthesis}
}
Damme, Renaud Van; Hölzer, Martin; Viehweger, Adrian; Müller, Bettina; Bongcam-Rudloff, Erik; Brandt, Christian
Metagenomics workflow for hybrid assembly, differential coverage binning, metatranscriptomics and pathway analysis (MUFFIN) Journal Article
In: PLOS Comput Biol, vol. 17, no. 2, pp. e1008716, 2021.
Abstract | Links | BibTeX | Tags: annotation, assembly, classification, DNA / genomics, metagenomics, RNA / transcriptomics, software
@article{VanDamme:21,
title = {Metagenomics workflow for hybrid assembly, differential coverage binning, metatranscriptomics and pathway analysis (MUFFIN)},
author = {Renaud Van Damme and Martin Hölzer and Adrian Viehweger and Bettina Müller and Erik Bongcam-Rudloff and Christian Brandt},
editor = {Mihaela Pertea},
url = {https://github.com/RVanDamme/MUFFIN},
doi = {10.1371/journal.pcbi.1008716},
year = {2021},
date = {2021-02-09},
urldate = {2021-02-09},
journal = {PLOS Comput Biol},
volume = {17},
number = {2},
pages = {e1008716},
publisher = {Public Library of Science (PLoS)},
abstract = {Metagenomics has redefined many areas of microbiology. However, metagenome-assembled genomes (MAGs) are often fragmented, primarily when sequencing was performed with short reads. Recent long-read sequencing technologies promise to improve genome reconstruction. However, the integration of two different sequencing modalities makes downstream analyses complex. We, therefore, developed MUFFIN, a complete metagenomic workflow that uses short and long reads to produce high-quality bins and their annotations. The workflow is written by using Nextflow, a workflow orchestration software, to achieve high reproducibility and fast and straightforward use. This workflow also produces the taxonomic classification and KEGG pathways of the bins and can be further used for quantification and annotation by providing RNA-Seq data (optionally). We tested the workflow using twenty biogas reactor samples and assessed the capacity of MUFFIN to process and output relevant files needed to analyze the microbial community and their function. MUFFIN produces functional pathway predictions and, if provided de novo metatranscript annotations across the metagenomic sample and for each bin. MUFFIN is available on github under GNUv3 licence: https://github.com/RVanDamme/MUFFIN.},
keywords = {annotation, assembly, classification, DNA / genomics, metagenomics, RNA / transcriptomics, software},
pubstate = {published},
tppubtype = {article}
}
2020
Kalvari, Ioanna; Nawrocki, Eric P; Ontiveros-Palacios, Nancy; Argasinska, Joanna; Lamkiewicz, Kevin; Marz, Manja; Griffiths-Jones, Sam; Toffano-Nioche, Claire; Gautheret, Daniel; Weinberg, Zasha; Rivas, Elena; Eddy, Sean R; Finn, Robert D; Bateman, Alex; Petrov, Anton I
Rfam 14: expanded coverage of metagenomic, viral and microRNA families Journal Article
In: Nucleic Acids Res, vol. 49, no. D1, pp. D192–D200, 2020.
Abstract | Links | BibTeX | Tags: alignment, annotation, bacteria, coronavirus, database, metagenomics, ncRNAs, RNA / transcriptomics, software, viruses
@article{Kalvari:21,
title = {Rfam 14: expanded coverage of metagenomic, viral and microRNA families},
author = {Ioanna Kalvari and Eric P Nawrocki and Nancy Ontiveros-Palacios and Joanna Argasinska and Kevin Lamkiewicz and Manja Marz and Sam Griffiths-Jones and Claire Toffano-Nioche and Daniel Gautheret and Zasha Weinberg and Elena Rivas and Sean R Eddy and Robert D Finn and Alex Bateman and Anton I Petrov},
url = {https://rfam.org/},
doi = {10.1093/nar/gkaa1047},
year = {2020},
date = {2020-11-19},
urldate = {2020-11-19},
journal = {Nucleic Acids Res},
volume = {49},
number = {D1},
pages = {D192--D200},
publisher = {Oxford University Press (OUP)},
abstract = {Rfam is a database of RNA families where each of the 3444 families is represented by a multiple sequence alignment of known RNA sequences and a covariance model that can be used to search for additional members of the family. Recent developments have involved expert collaborations to improve the quality and coverage of Rfam data, focusing on microRNAs, viral and bacterial RNAs. We have completed the first phase of synchronising microRNA families in Rfam and miRBase, creating 356 new Rfam families and updating 40. We established a procedure for comprehensive annotation of viral RNA families starting with Flavivirus and Coronaviridae RNAs. We have also increased the coverage of bacterial and metagenome-based RNA families from the ZWD database. These developments have enabled a significant growth of the database, with the addition of 759 new families in Rfam 14. To facilitate further community contribution to Rfam, expert users are now able to build and submit new families using the newly developed Rfam Cloud family curation system. New Rfam website features include a new sequence similarity search powered by RNAcentral, as well as search and visualisation of families with pseudoknots. Rfam is freely available at https://rfam.org.},
keywords = {alignment, annotation, bacteria, coronavirus, database, metagenomics, ncRNAs, RNA / transcriptomics, software, viruses},
pubstate = {published},
tppubtype = {article}
}
Hölzer, Martin; Barf, Lisa-Marie; Lamkiewicz, Kevin; Vorimore, Fabien; Lataretu, Marie; Favaroni, Alison; Schnee, Christiane; Laroucau, Karine; Marz, Manja; Sachse, Konrad
Comparative Genome Analysis of 33 Chlamydia Strains Reveals Characteristic Features of Chlamydia Psittaci and Closely Related Species Journal Article
In: Pathogens, vol. 9, no. 11, pp. 899, 2020.
Abstract | Links | BibTeX | Tags: annotation, bacteria, DNA / genomics, software
@article{Hölzer:20,
title = {Comparative Genome Analysis of 33 \textit{Chlamydia} Strains Reveals Characteristic Features of \textit{Chlamydia Psittaci} and Closely Related Species},
author = {Martin Hölzer and Lisa-Marie Barf and Kevin Lamkiewicz and Fabien Vorimore and Marie Lataretu and Alison Favaroni and Christiane Schnee and Karine Laroucau and Manja Marz and Konrad Sachse},
url = {github.com/hoelzer-lab/ribap},
doi = {10.3390/pathogens9110899},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {Pathogens},
volume = {9},
number = {11},
pages = {899},
publisher = {MDPI AG},
abstract = {To identify genome-based features characteristic of the avian and human pathogen Chlamydia (C.) psittaci and related chlamydiae, we analyzed whole-genome sequences of 33 strains belonging to 12 species. Using a novel genome analysis tool termed Roary ILP Bacterial Annotation Pipeline (RIBAP), this panel of strains was shown to share a large core genome comprising 784 genes and representing approximately 80% of individual genomes. Analyzing the most variable genomic sites, we identified a set of features of C. psittaci that in its entirety is characteristic of this species: (i) a relatively short plasticity zone of less than 30,000 nt without a tryptophan operon (also in C. abortus, C. avium, C. gallinacea, C. pneumoniae), (ii) a characteristic set of of Inc proteins comprising IncA, B, C, V, X, Y (with homologs in C. abortus, C. caviae and C. felis as closest relatives), (iii) a 502-aa SinC protein, the largest among Chlamydia spp., and (iv) an elevated number of Pmp proteins of subtype G (14 in C. psittaci, 14 in Cand. C. ibidis). In combination with future functional studies, the common and distinctive criteria revealed in this study provide important clues for understanding the complexity of host-specific behavior of individual Chlamydia spp.},
keywords = {annotation, bacteria, DNA / genomics, software},
pubstate = {published},
tppubtype = {article}
}
2019
Mostajo, Nelly F.; Lataretu, Marie; Krautwurst, Sebastian; Mock, Florian; Desirò, Daniel; Lamkiewicz, Kevin; Collatz, Maximilian; Schoen, Andreas; Weber, Friedemann; Marz, Manja; Hölzer, Martin
A comprehensive annotation and differential expression analysis of short and long non-coding RNAs in 16 bat genomes Journal Article
In: NAR Genomics Bioinf, vol. 2, no. 1, pp. lqz006, 2019.
Abstract | Links | BibTeX | Tags: annotation, assembly, differential expression analysis, evolution, ncRNAs, RNA / transcriptomics, virus host interaction, viruses
@article{Mostajo:20,
title = {A comprehensive annotation and differential expression analysis of short and long non-coding RNAs in 16 bat genomes},
author = {Nelly F. Mostajo and Marie Lataretu and Sebastian Krautwurst and Florian Mock and Daniel Desirò and Kevin Lamkiewicz and Maximilian Collatz and Andreas Schoen and Friedemann Weber and Manja Marz and Martin Hölzer},
url = {https://www.rna.uni-jena.de/supplements/bats/index.html},
doi = {10.1093/nargab/lqz006},
year = {2019},
date = {2019-09-30},
urldate = {2019-09-30},
journal = {NAR Genomics Bioinf},
volume = {2},
number = {1},
pages = {lqz006},
abstract = {Although bats are increasingly becoming the focus of scientific studies due to their unique properties, these exceptional animals are still among the least studied mammals. Assembly quality and completeness of bat genomes vary a lot and especially non-coding RNA (ncRNA) annotations are incomplete or simply missing. Accordingly, standard bioinformatics pipelines for gene expression analysis often ignore ncRNAs such as microRNAs or long antisense RNAs. The main cause of this problem is the use of incomplete genome annotations. We present a complete screening for ncRNAs within 16 bat genomes. NcRNAs affect a remarkable variety of vital biological functions, including gene expression regulation, RNA processing, RNA interference and, as recently described, regulatory processes in viral infections. Within all investigated bat assemblies, we annotated 667 ncRNA families including 162 snoRNAs and 193 miRNAs as well as rRNAs, tRNAs, several snRNAs and lncRNAs, and other structural ncRNA elements. We validated our ncRNA candidates by six RNA-Seq data sets and show significant expression patterns that have never been described before in a bat species on such a large scale. Our annotations will be usable as a resource (rna.uni-jena.de/supplements/bats) for deeper studying of bat evolution, ncRNAs repertoire, gene expression and regulation, ecology and important host–virus interactions.},
keywords = {annotation, assembly, differential expression analysis, evolution, ncRNAs, RNA / transcriptomics, virus host interaction, viruses},
pubstate = {published},
tppubtype = {article}
}
Riege, Konstantin
2019.
Links | BibTeX | Tags: annotation, bacteria, fungi, ncRNAs, RNA / transcriptomics
@phdthesis{nokey,
title = {Annotation of non-coding RNAs and examination of Next Generation Sequencing data of pathogenic organisms},
author = {Konstantin Riege},
url = {https://suche.thulb.uni-jena.de/Record/1067866388},
year = {2019},
date = {2019-01-01},
urldate = {2019-01-01},
howpublished = {Friedrich-Schiller-Universität Jena},
keywords = {annotation, bacteria, fungi, ncRNAs, RNA / transcriptomics},
pubstate = {published},
tppubtype = {phdthesis}
}
2018
Gerst, Ruman; Hölzer, Martin
PCAGO: An interactive web service to analyze RNA-Seq data with principal component analysis Journal Article
In: bioRxiv, pp. 433078, 2018.
Abstract | Links | BibTeX | Tags: annotation, differential expression analysis, RNA / transcriptomics, software
@article{Gerst:18,
title = {PCAGO: An interactive web service to analyze RNA-Seq data with principal component analysis},
author = {Ruman Gerst and Martin Hölzer},
url = {https://github.com/rnajena/pcago-unified},
doi = {10.1101/433078},
year = {2018},
date = {2018-10-03},
urldate = {2018-10-03},
journal = {bioRxiv},
pages = {433078},
publisher = {Cold Spring Harbor Laboratory},
abstract = {The initial characterization and clustering of biological samples is a critical step in the analysis of any transcriptomics study. In many studies, principal component analysis (PCA) is the clustering algorithm of choice to predict the relationship of samples or cells based solely on differential gene expression. In addition to the pure quality evaluation of the data, a PCA can also provide initial insights into the biological background of an experiment and help researchers to interpret the data and design the subsequent computational steps accordingly. However, to avoid misleading clusterings and interpretations, an appropriate selection of the underlying gene sets to build the PCA and the choice of the most fitting principal components for the visualization are crucial parts. Here, we present PCAGO, an easy-to-use and interactive tool to analyze gene quantification data derived from RNA sequencing experiments with PCA. The tool includes features such as read-count normalization, filtering of read counts by gene annotation, and various visualization options. In addition, PCAGO helps to select appropriate parameters such as the number of genes and principal components to create meaningful visualizations.},
keywords = {annotation, differential expression analysis, RNA / transcriptomics, software},
pubstate = {published},
tppubtype = {article}
}
2017
Baumgart, Mario; Barth, Emanuel; Savino, Aurora; Groth, Marco; Koch, Philipp; Petzold, Andreas; Arisi, Ivan; Platzer, Matthias; Marz, Manja; Cellerino, Alessandro
A miRNA catalogue and ncRNA annotation of the short-living fish Nothobranchius furzeri Journal Article
In: BMC Genomics, vol. 18, pp. 693, 2017.
Abstract | Links | BibTeX | Tags: aging, annotation, ncRNAs, RNA / transcriptomics
@article{Baumgart:17,
title = {A miRNA catalogue and ncRNA annotation of the short-living fish \textit{Nothobranchius furzeri}},
author = {Mario Baumgart and Emanuel Barth and Aurora Savino and Marco Groth and Philipp Koch and Andreas Petzold and Ivan Arisi and Matthias Platzer and Manja Marz and Alessandro Cellerino},
doi = {10.1186/s12864-017-3951-8},
year = {2017},
date = {2017-09-05},
urldate = {2017-01-01},
journal = {BMC Genomics},
volume = {18},
pages = {693},
abstract = {The short-lived fish Nothobranchius furzeri is the shortest-lived vertebrate that can be cultured in captivity and was recently established as a model organism for aging research. Small non-coding RNAs, especially miRNAs, are implicated in age dependent control of gene expression. Here, we present a comprehensive catalogue of miRNAs and several other non-coding RNA classes (ncRNAs) for Nothobranchius furzeri. Analyzing multiple small RNA-Seq libraries, we show most of these identified miRNAs are expressed in at least one of seven Nothobranchius species. Additionally, duplication and clustering of N. furzeri miRNAs was analyzed and compared to the four fish species Danio rerio, Oryzias latipes, Gasterosteus aculeatus and Takifugu rubripes. A peculiar characteristic of N. furzeri, as compared to other teleosts, was a duplication of the miR-29 cluster. The completeness of the catalogue we provide is comparable to that of the zebrafish. This catalogue represents a basis to investigate the role of miRNAs in aging and development in this species.},
keywords = {aging, annotation, ncRNAs, RNA / transcriptomics},
pubstate = {published},
tppubtype = {article}
}
2016
Marschall, Tobias; Marz, Manja; Abeel, Thomas; Dijkstra, Louis; Dutilh, Bas E.; Ghaffaari, Ali; Kersey, Paul; Kloosterman, Wigard P.; Mäkinen, Veli; Novak, Adam M.; Paten, Benedict; Porubsky, David; Rivals, Eric; Alkan, Can; Baaijens, Jasmijn A.; Bakker, Paul I. W. De; Boeva, Valentina; Bonnal, Raoul J. P.; Chiaromonte, Francesca; Chikhi, Rayan; Ciccarelli, Francesca D.; Cijvat, Robin; Datema, Erwin; Duijn, Cornelia M. Van; Eichler, Evan E.; Ernst, Corinna; Eskin, Eleazar; Garrison, Erik; El-Kebir, Mohammed; Klau, Gunnar W.; Korbel, Jan O.; Lameijer, Eric-Wubbo; Langmead, Benjamin; Martin, Marcel; Medvedev, Paul; Mu, John C.; Neerincx, Pieter; Ouwens, Klaasjan; Peterlongo, Pierre; Pisanti, Nadia; Rahmann, Sven; Raphael, Ben; Reinert, Knut; Ridder, Dick; Ridder, Jeroen; Schlesner, Matthias; Schulz-Trieglaff, Ole; Sanders, Ashley D.; Sheikhizadeh, Siavash; Shneider, Carl; Smit, Sandra; Valenzuela, Daniel; Wang, Jiayin; Wessels, Lodewyk; Zhang, Ying; Guryev, Victor; Vandin, Fabio; Ye, Kai; Schönhuth, Alexander
Computational pan-genomics: status, promises and challenges Journal Article
In: Briefings Bioinf, vol. 19, pp. 118–135, 2016.
Abstract | Links | BibTeX | Tags: annotation, DNA / genomics
@article{Consortium:18,
title = {Computational pan-genomics: status, promises and challenges},
author = {Tobias Marschall and Manja Marz and Thomas Abeel and Louis Dijkstra and Bas E. Dutilh and Ali Ghaffaari and Paul Kersey and Wigard P. Kloosterman and Veli Mäkinen and Adam M. Novak and Benedict Paten and David Porubsky and Eric Rivals and Can Alkan and Jasmijn A. Baaijens and Paul I. W. De Bakker and Valentina Boeva and Raoul J. P. Bonnal and Francesca Chiaromonte and Rayan Chikhi and Francesca D. Ciccarelli and Robin Cijvat and Erwin Datema and Cornelia M. Van Duijn and Evan E. Eichler and Corinna Ernst and Eleazar Eskin and Erik Garrison and Mohammed El-Kebir and Gunnar W. Klau and Jan O. Korbel and Eric-Wubbo Lameijer and Benjamin Langmead and Marcel Martin and Paul Medvedev and John C. Mu and Pieter Neerincx and Klaasjan Ouwens and Pierre Peterlongo and Nadia Pisanti and Sven Rahmann and Ben Raphael and Knut Reinert and Dick Ridder and Jeroen Ridder and Matthias Schlesner and Ole Schulz-Trieglaff and Ashley D. Sanders and Siavash Sheikhizadeh and Carl Shneider and Sandra Smit and Daniel Valenzuela and Jiayin Wang and Lodewyk Wessels and Ying Zhang and Victor Guryev and Fabio Vandin and Kai Ye and Alexander Schönhuth},
doi = {10.1093/bib/bbw089},
year = {2016},
date = {2016-10-21},
urldate = {2016-10-21},
journal = {Briefings Bioinf},
volume = {19},
pages = {118--135},
abstract = {Many disciplines, from human genetics and oncology to plant breeding, microbiology and virology, commonly face the challenge of analyzing rapidly increasing numbers of genomes. In case of Homo sapiens, the number of sequenced genomes will approach hundreds of thousands in the next few years. Simply scaling up established bioinformatics pipelines will not be sufficient for leveraging the full potential of such rich genomic data sets. Instead, novel, qualitatively different computational methods and paradigms are needed. We will witness the rapid extension of computational pan-genomics, a new sub-area of research in computational biology. In this article, we generalize existing definitions and understand a pan-genome as any collection of genomic sequences to be analyzed jointly or to be used as a reference. We examine already available approaches to construct and use pan-genomes, discuss the potential benefits of future technologies and methodologies and review open challenges from the vantage point of the above-mentioned biological disciplines. As a prominent example for a computational paradigm shift, we particularly highlight the transition from the representation of reference genomes as strings to representations as graphs. We outline how this and other challenges from different application domains translate into common computational problems, point out relevant bioinformatics techniques and identify open problems in computer science. With this review, we aim to increase awareness that a joint approach to computational pan-genomics can help address many of the problems currently faced in various domains.},
keywords = {annotation, DNA / genomics},
pubstate = {published},
tppubtype = {article}
}
2015
Möbius, Petra; Hölzer, Martin; Felder, Marius; Nordsiek, Gabriele; Groth, Marco; Köhler, Heike; Reichwald, Kathrin; Platzer, Matthias; Marz, Manja
Comprehensive insights in the Mycobacterium avium subsp. paratuberculosis genome using new WGS data of sheep strain JIII-386 from Germany Journal Article
In: Genome Biol Evol, vol. 7, no. 9, pp. 2585–2601, 2015.
Abstract | Links | BibTeX | Tags: annotation, assembly, bacteria, DNA / genomics
@article{Moebius:15,
title = {Comprehensive insights in the \textit{Mycobacterium avium} subsp. \textit{paratuberculosis} genome using new WGS data of sheep strain JIII-386 from Germany},
author = {Petra Möbius and Martin Hölzer and Marius Felder and Gabriele Nordsiek and Marco Groth and Heike Köhler and Kathrin Reichwald and Matthias Platzer and Manja Marz},
doi = {10.1093/gbe/evv154},
year = {2015},
date = {2015-09-17},
urldate = {2015-09-17},
journal = {Genome Biol Evol},
volume = {7},
number = {9},
pages = {2585--2601},
abstract = {Mycobacterium avium (M. a.) subsp. paratuberculosis (MAP) - the etiologic agent of Johne's disease - affects cattle, sheep and other ruminants worldwide. To decipher phenotypic differences among sheep and cattle strains (belonging to MAP-S [Type-I/III] respectively MAP-C [Type-II]) comparative genome analysis needs data from diverse isolates originating from different geographic regions of the world. The current study presents the so far best assembled genome of a MAP-S-strain: sheep isolate JIII-386 from Germany. One newly sequenced cattle isolate (JII-1961, Germany), four published MAP strains of MAP-C and MAP-S from U.S. and Australia and M. a. subsp. hominissuis (MAH) strain 104 were used for assembly improvement and comparisons. All genomes were annotated by BacProt and results compared with NCBI annotation. Corresponding protein-coding sequences (CDSs) were detected, but also CDSs that were exclusively determined either by NCBI or BacProt. A new Shine-Dalgarno sequence motif (5'AGCTGG3') was extracted. Novel CDSs including PE-PGRS family protein genes and about 80 non-coding RNAs exhibiting high sequence conservation are presented. Previously found genetic differences between MAP-types are partially revised. Four out of ten assumed MAP-S-specific large sequence polymorphism regions (LSP s) are still present in MAP-C strains; new LSP s were identified. Independently of the regional origin of the strains, the number of individual CDSs and single nucleotide variants confirm the strong similarity of MAP-C strains and show higher diversity among MAP-S strains. This study gives ambiguous results regarding the hypothesis that MAP-S is the evolutionary intermediate between MAH and MAP-C, but it clearly shows a higher similarity of MAP to MAH than to M. intracellulare.},
keywords = {annotation, assembly, bacteria, DNA / genomics},
pubstate = {published},
tppubtype = {article}
}
Sahyoun, Abdullah H; Hölzer, Martin; Jühling, Frank; zu Siederdissen, Christian Höner; Al-Arab, Marwa; Tout, Kifah; Marz, Manja; Middendorf, Martin; Stadler, Peter F; Bernt, Matthias
Towards a comprehensive picture of alloacceptor tRNA remolding in metazoan mitochondrial genomes Journal Article
In: Nucleic Acids Res, vol. 43, no. 16, pp. 8044–8056, 2015.
Abstract | Links | BibTeX | Tags: annotation, evolution, RNA / transcriptomics
@article{Sahyoun:15,
title = {Towards a comprehensive picture of alloacceptor tRNA remolding in metazoan mitochondrial genomes},
author = {Abdullah H Sahyoun and Martin Hölzer and Frank Jühling and Christian {Höner zu Siederdissen} and Marwa Al-Arab and Kifah Tout and Manja Marz and Martin Middendorf and Peter F Stadler and Matthias Bernt},
doi = {10.1093/nar/gkv746},
year = {2015},
date = {2015-07-30},
urldate = {2015-07-30},
journal = {Nucleic Acids Res},
volume = {43},
number = {16},
pages = {8044--8056},
abstract = {Remolding of tRNAs is a well-documented process in mitochondrial genomes that changes the identity of a tRNA. It involves a duplication of a tRNA gene, a mutation that changes the anticodon and the loss of the ancestral tRNA gene. The net effect is a functional tRNA that is more closely related to tRNAs of a different alloacceptor family than to tRNAs with the same anticodon in related species. Beyond being of interest for understanding mitochondrial tRNA function and evolution, tRNA remolding events can lead to artifacts in the annotation of mitogenomes and thus in studies of mitogenomic evolution. Therefore, it is important to identify and catalog these events. Here we describe novel methods to detect tRNA remolding in large-scale data sets and apply them to survey tRNA remolding throughout animal evolution. We identify several novel remolding events in addition to the ones previously mentioned in the literature. A detailed analysis of these remoldings showed that many of them are derived from ancestral events.},
keywords = {annotation, evolution, RNA / transcriptomics},
pubstate = {published},
tppubtype = {article}
}
Linde, Jörg; Duggan, Seána; Weber, Michael; Horn, Fabian; Sieber, Patricia; Hellwig, Daniela; Riege, Konstantin; Marz, Manja; Martin, Ronny; Guthke, Reinhard; Kurzai, Oliver
Defining the transcriptomic landscape of Candida glabrata by RNA-Seq Journal Article
In: Nucleic Acids Res, vol. 43, no. 3, pp. 1392–1406, 2015.
Abstract | Links | BibTeX | Tags: annotation, fungi, RNA / transcriptomics, splicing
@article{Linde:15,
title = {Defining the transcriptomic landscape of \textit{Candida glabrata} by RNA-Seq},
author = {Jörg Linde and Seána Duggan and Michael Weber and Fabian Horn and Patricia Sieber and Daniela Hellwig and Konstantin Riege and Manja Marz and Ronny Martin and Reinhard Guthke and Oliver Kurzai},
doi = {10.1093/nar/gku1357},
year = {2015},
date = {2015-01-13},
urldate = {2015-01-13},
journal = {Nucleic Acids Res},
volume = {43},
number = {3},
pages = {1392--1406},
abstract = {Candida glabrata is the second most common pathogenic Candida species and has emerged as a leading cause of nosocomial fungal infections. Its reduced susceptibility to antifungal drugs and its close relationship to Saccharomyces cerevisiae make it an interesting research focus. Although its genome sequence was published in 2004, little is known about its transcriptional dynamics. Here, we provide a detailed RNA-Seq-based analysis of the transcriptomic landscape of C. glabrata in nutrient-rich media, as well as under nitrosative stress and during pH shift. Using RNA-Seq data together with state-of-the-art gene prediction tools, we refined the annotation of the C. glabrata genome and predicted 49 novel protein-coding genes. Of these novel genes, 14 have homologs in S. cerevisiae and six are shared with other Candida species. We experimentally validated four novel protein-coding genes of which two are differentially regulated during pH shift and interaction with human neutrophils, indicating a potential role in host-pathogen interaction. Furthermore, we identified 58 novel non-protein-coding genes, 38 new introns and condition-specific alternative splicing. Finally, our data suggest different patterns of adaptation to pH shift and nitrosative stress in C. glabrata, Candida albicans and S. cerevisiae and thus further underline a distinct evolution of virulence in yeast.},
keywords = {annotation, fungi, RNA / transcriptomics, splicing},
pubstate = {published},
tppubtype = {article}
}
2014
Bauer, Eugen; Salem, Hassan; Marz, Manja; Vogel, Heiko; Kaltenpoth, Martin
In: PLoS One, vol. 9, pp. e114865, 2014.
Abstract | Links | BibTeX | Tags: annotation, assembly, bacteria, differential expression analysis, insects, RNA / transcriptomics
@article{Bauer:14,
title = {Transcriptomic immune response of the cotton stainer \textit{Dysdercus fasciatus} to experimental elimination of vitamin-supplementing intestinal symbionts},
author = {Eugen Bauer and Hassan Salem and Manja Marz and Heiko Vogel and Martin Kaltenpoth},
url = {http://www.ebi.ac.uk/ena/data/view/PRJEB6171},
doi = {10.1371/journal.pone.0114865},
year = {2014},
date = {2014-12-09},
urldate = {2014-12-09},
journal = {PLoS One},
volume = {9},
pages = {e114865},
abstract = {The acquisition and vertical transmission of bacterial symbionts plays an important role in insect evolution and ecology. However, the molecular mechanisms underlying the stable maintenance and control of mutualistic bacteria remain poorly understood. The cotton stainer Dysdercus fasciatus harbours the actinobacterial symbionts Coriobacterium glomerans and Gordonibacter sp. in its midgut. The symbionts supplement limiting B vitamins and thereby significantly contribute to the host's fitness. In this study, we experimentally disrupted the symbionts' vertical transmission route and performed comparative transcriptomic analyses of genes expressed in the gut of aposymbiotic (symbiont-free) and control individuals to study the host immune response in presence and absence of the mutualists. Annotation of assembled cDNA reads identified a considerable number of genes involved in the innate immune system, including different protein isoforms of several immune effector proteins (specifically i-type lysozyme, defensin, hemiptericin, and pyrrhocoricin), suggesting the possibility for a highly differentiated response towards the complex resident microbial community. Gene expression analyses revealed a constitutive expression of transcripts involved in signal transduction of the main insect immune pathways, but differential expression of certain antimicrobial peptide genes. Specifically, qPCRs confirmed the significant down-regulation of c-type lysozyme and up-regulation of hemiptericin in aposymbiotic individuals. The high expression of c-type lysozyme in symbiont-containing bugs may serve to lyse symbiont cells and thereby harvest B-vitamins that are necessary for subsistence on the deficient diet of Malvales seeds. Our findings suggest a sophisticated host response to perturbation of the symbiotic gut microbiota, indicating that the innate immune system not only plays an important role in combating pathogens, but also serves as a communication interface between host and symbionts.},
keywords = {annotation, assembly, bacteria, differential expression analysis, insects, RNA / transcriptomics},
pubstate = {published},
tppubtype = {article}
}
Lechner, Marcus; Nickel, Astrid I.; Wehner, Stefanie; Riege, Konstantin; Wieseke, Nicolas; Beckmann, Benedikt M.; Hartmann, Roland K.; Marz, Manja
Genomewide comparison and novel ncRNAs of Aquificales Journal Article
In: BMC Genomics, vol. 15, pp. 522, 2014.
Abstract | Links | BibTeX | Tags: alignment, annotation, assembly, bacteria, classification, ncRNAs, phylogenetics
@article{Lechner:14,
title = {Genomewide comparison and novel ncRNAs of Aquificales},
author = {Marcus Lechner and Astrid I. Nickel and Stefanie Wehner and Konstantin Riege and Nicolas Wieseke and Benedikt M. Beckmann and Roland K. Hartmann and Manja Marz},
doi = {10.1186/1471-2164-15-522},
year = {2014},
date = {2014-06-25},
urldate = {2014-06-25},
journal = {BMC Genomics},
volume = {15},
pages = {522},
abstract = {The Aquificales are a diverse group of thermophilic bacteria that thrive in terrestrial and marine hydrothermal environments. They can be divided into the families Aquificaceae, Desulfurobacteriaceae and Hydrogenothermaceae. Although eleven fully sequenced and assembled genomes are available, only little is known about this taxonomic order in terms of RNA metabolism. In this work, we compare the available genomes, extend their protein annotation, identify regulatory sequences, annotate non-coding RNAs (ncRNAs) of known function, predict novel ncRNA candidates, show idiosyncrasies of the genetic decoding machinery, present two different types of transfer-messenger RNAs and variations of the CRISPR systems. Furthermore, we performed a phylogenetic analysis of the Aquificales based on entire genome sequences, and extended this by a classification among all bacteria using 16S rRNA sequences and a set of orthologous proteins.Combining several in silico features (e.g. conserved and stable secondary structures, GC-content, comparison based on multiple genome alignments) with an in vivo dRNA-seq transcriptome analysis of Aquifex aeolicus, we predict roughly 100 novel ncRNA candidates in this bacterium. We have here re-analyzed the Aquificales, a group of bacteria thriving in extreme environments, sharing the feature of a small, compact genome with a reduced number of protein and ncRNA genes. We present several classical ncRNAs and riboswitch candidates. By combining in silico analysis with dRNA-seq data of A. aeolicus we predict nearly 100 novel ncRNA candidates.},
keywords = {alignment, annotation, assembly, bacteria, classification, ncRNAs, phylogenetics},
pubstate = {published},
tppubtype = {article}
}
2010
Dalloul, Rami A.; Long, Julie A.; Zimin, Aleksey V.; Aslam, Luqman; Beal, Kathryn; Blomberg, Le Ann; Bouffard, Pascal; Burt, David W.; Crasta, Oswald; Crooijmans, Richard P. M. A.; Cooper, Kristal; Coulombe, Roger A.; De, Supriyo; Delany, Mary E.; Dodgson, Jerry B.; Dong, Jennifer J.; Evans, Clive; Frederickson, Karin M.; Flicek, Paul; Florea, Liliana; Folkerts, Otto; Groenen, Martien A. M.; Harkins, Tim T.; Herrero, Javier; Hoffmann, Steve; Megens, Hendrik-Jan; Jiang, Andrew; Jong, Pieter; Kaiser, Pete; Kim, Heebal; Kim, Kyu-Won; Kim, Sungwon; Langenberger, David; Lee, Mi-Kyung; Lee, Taeheon; Mane, Shrinivasrao; Marcais, Guillaume; Marz, Manja; McElroy, Audrey P.; Modise, Thero; Nefedov, Mikhail; Notredame, Cédric; Paton, Ian R.; Payne, William S.; Pertea, Geo; Prickett, Dennis; Puiu, Daniela; Qioa, Dan; Raineri, Emanuele; Ruffier, Magali; Salzberg, Steven L.; Schatz, Michael C.; Scheuring, Chantel; Schmidt, Carl J.; Schroeder, Steven; Searle, Stephen M. J.; Smith, Edward J.; Smith, Jacqueline; Sonstegard, Tad S.; Stadler, Peter F.; Tafer, Hakim; Tu, Zhijian Jake; Tassell, Curtis P. Van; Vilella, Albert J.; Williams, Kelly P.; Yorke, James A.; Zhang, Liqing; Zhang, Hong-Bin; Zhang, Xiaojun; Zhang, Yang; Reed, Kent M.
Multi-platform next-generation sequencing of the domestic turkey (Meleagris gallopavo): genome assembly and analysis Journal Article
In: PLoS Biol, vol. 8, 2010.
Abstract | Links | BibTeX | Tags: alignment, annotation, assembly, DNA / genomics, ncRNAs
@article{Dalloul:10,
title = {Multi-platform next-generation sequencing of the domestic turkey (\textit{Meleagris gallopavo}): genome assembly and analysis},
author = {Rami A. Dalloul and Julie A. Long and Aleksey V. Zimin and Luqman Aslam and Kathryn Beal and Le Ann Blomberg and Pascal Bouffard and David W. Burt and Oswald Crasta and Richard P. M. A. Crooijmans and Kristal Cooper and Roger A. Coulombe and Supriyo De and Mary E. Delany and Jerry B. Dodgson and Jennifer J. Dong and Clive Evans and Karin M. Frederickson and Paul Flicek and Liliana Florea and Otto Folkerts and Martien A. M. Groenen and Tim T. Harkins and Javier Herrero and Steve Hoffmann and Hendrik-Jan Megens and Andrew Jiang and Pieter Jong and Pete Kaiser and Heebal Kim and Kyu-Won Kim and Sungwon Kim and David Langenberger and Mi-Kyung Lee and Taeheon Lee and Shrinivasrao Mane and Guillaume Marcais and Manja Marz and Audrey P. McElroy and Thero Modise and Mikhail Nefedov and Cédric Notredame and Ian R. Paton and William S. Payne and Geo Pertea and Dennis Prickett and Daniela Puiu and Dan Qioa and Emanuele Raineri and Magali Ruffier and Steven L. Salzberg and Michael C. Schatz and Chantel Scheuring and Carl J. Schmidt and Steven Schroeder and Stephen M. J. Searle and Edward J. Smith and Jacqueline Smith and Tad S. Sonstegard and Peter F. Stadler and Hakim Tafer and Zhijian Jake Tu and Curtis P. Van Tassell and Albert J. Vilella and Kelly P. Williams and James A. Yorke and Liqing Zhang and Hong-Bin Zhang and Xiaojun Zhang and Yang Zhang and Kent M. Reed},
doi = {10.1371/journal.pbio.1000475},
year = {2010},
date = {2010-09-07},
urldate = {2010-09-07},
journal = {PLoS Biol},
volume = {8},
abstract = {A synergistic combination of two next-generation sequencing platforms with a detailed comparative BAC physical contig map provided a cost-effective assembly of the genome sequence of the domestic turkey (Meleagris gallopavo). Heterozygosity of the sequenced source genome allowed discovery of more than 600,000 high quality single nucleotide variants. Despite this heterozygosity, the current genome assembly (∼1.1 Gb) includes 917 Mb of sequence assigned to specific turkey chromosomes. Annotation identified nearly 16,000 genes, with 15,093 recognized as protein coding and 611 as non-coding RNA genes. Comparative analysis of the turkey, chicken, and zebra finch genomes, and comparing avian to mammalian species, supports the characteristic stability of avian genomes and identifies genes unique to the avian lineage. Clear differences are seen in number and variety of genes of the avian immune system where expansions and novel genes are less frequent than examples of gene loss. The turkey genome sequence provides resources to further understand the evolution of vertebrate genomes and genetic variation underlying economically important quantitative traits in poultry. This integrated approach may be a model for providing both gene and chromosome level assemblies of other species with agricultural, ecological, and evolutionary interest.},
keywords = {alignment, annotation, assembly, DNA / genomics, ncRNAs},
pubstate = {published},
tppubtype = {article}
}
Yusuf, Dilmurat; Marz, Manja; Stadler, Peter F; Hofacker, Ivo L
Bcheck: a wrapper tool for detecting RNase P RNA genes Journal Article
In: BMC Genomics, vol. 11, pp. 432, 2010.
Abstract | Links | BibTeX | Tags: annotation, bacteria, classification, fungi, ncRNAs, RNA / transcriptomics, software
@article{Yusuf:10,
title = {Bcheck: a wrapper tool for detecting RNase P RNA genes},
author = {Dilmurat Yusuf and Manja Marz and Peter F Stadler and Ivo L Hofacker},
url = {http://rna.tbi.univie.ac.at/bcheck},
doi = {10.1186/1471-2164-11-432},
year = {2010},
date = {2010-07-13},
urldate = {2010-07-13},
journal = {BMC Genomics},
volume = {11},
pages = {432},
abstract = {Effective bioinformatics solutions are needed to tackle challenges posed by industrial-scale genome annotation. We present Bcheck, a wrapper tool which predicts RNase P RNA genes by combining the speed of pattern matching and sensitivity of covariance models. The core of Bcheck is a library of subfamily specific descriptor models and covariance models. Scanning all microbial genomes in GenBank identifies RNase P RNA genes in 98% of 1024 microbial chromosomal sequences within just 4 hours on single CPU. Comparing to existing annotations found in 387 of the GenBank files, Bcheck predictions have more intact structure and are automatically classified by subfamily membership. For eukaryotic chromosomes Bcheck could identify the known RNase P RNA genes in 84 out of 85 metazoan genomes and 19 out of 21 fungi genomes. Bcheck predicted 37 novel eukaryotic RNase P RNA genes, 32 of which are from fungi. Gene duplication events are observed in at least 20 metazoan organisms. Scanning of meta-genomic data from the Global Ocean Sampling Expedition, comprising over 10 million sample sequences (18 Gigabases), predicted 2909 unique genes, 98% of which fall into ancestral bacteria A type of RNase P RNA and 66% of which have no close homolog to known prokaryotic RNase P RNA. The combination of efficient filtering by means of a descriptor-based search and subsequent construction of a high-quality gene model by means of a covariance model provides an efficient method for the detection of RNase P RNA genes in large-scale sequencing data. Bcheck is implemented as webserver and can also be downloaded for local use from http://rna.tbi.univie.ac.at/bcheck.},
keywords = {annotation, bacteria, classification, fungi, ncRNAs, RNA / transcriptomics, software},
pubstate = {published},
tppubtype = {article}
}
2009
Copeland, Claudia S.; Marz, Manja; Rose, Dominic; Hertel, Jana; Brindley, Paul J.; Santana, Clara Bermudez; Kehr, Stephanie; Attolini, Camille Stephan-Otto; Stadler, Peter F.
Homology-based annotation of non-coding RNAs in the genomes of Schistosoma mansoni and Schistosoma japonicum Journal Article
In: BMC Genomics, vol. 10, pp. 464, 2009.
Abstract | Links | BibTeX | Tags: annotation, ncRNAs, RNA structure
@article{Copeland:09,
title = {Homology-based annotation of non-coding RNAs in the genomes of \textit{Schistosoma mansoni} and \textit{Schistosoma japonicum}},
author = {Claudia S. Copeland and Manja Marz and Dominic Rose and Jana Hertel and Paul J. Brindley and Clara Bermudez Santana and Stephanie Kehr and Camille Stephan-Otto Attolini and Peter F. Stadler},
doi = {10.1186/1471-2164-10-464},
year = {2009},
date = {2009-10-08},
urldate = {2009-10-08},
journal = {BMC Genomics},
volume = {10},
pages = {464},
abstract = {Schistosomes are trematode parasites of the phylum Platyhelminthes. They are considered the most important of the human helminth parasites in terms of morbidity and mortality. Draft genome sequences are now available for Schistosoma mansoni and Schistosoma japonicum. Non-coding RNA (ncRNA) plays a crucial role in gene expression regulation, cellular function and defense, homeostasis, and pathogenesis. The genome-wide annotation of ncRNAs is a non-trivial task unless well-annotated genomes of closely related species are already available. A homology search for structured ncRNA in the genome of S. mansoni resulted in 23 types of ncRNAs with conserved primary and secondary structure. Among these, we identified rRNA, snRNA, SL RNA, SRP, tRNAs and RNase P, and also possibly MRP and 7SK RNAs. In addition, we confirmed five miRNAs that have recently been reported in S. japonicum and found two additional homologs of known miRNAs. The tRNA complement of S. mansoni is comparable to that of the free-living planarian Schmidtea mediterranea, although for some amino acids differences of more than a factor of two are observed: Leu, Ser, and His are overrepresented, while Cys, Meth, and Ile are underrepresented in S. mansoni. On the other hand, the number of tRNAs in the genome of S. japonicum is reduced by more than a factor of four. Both schistosomes have a complete set of minor spliceosomal snRNAs. Several ncRNAs that are expected to exist in the S. mansoni genome were not found, among them the telomerase RNA, vault RNAs, and Y RNAs. The ncRNA sequences and structures presented here represent the most complete dataset of ncRNA from any lophotrochozoan reported so far. This data set provides an important reference for further analysis of the genomes of schistosomes and indeed eukaryotic genomes at large.},
keywords = {annotation, ncRNAs, RNA structure},
pubstate = {published},
tppubtype = {article}
}
Hertel, Jana; Jong, Danielle; Marz, Manja; Rose, Dominic; Tafer, Hakim; Tanzer, Andrea; Schierwater, Bernd; Stadler, Peter F
Non-coding RNA annotation of the genome of Ŧrichoplax adhaerens Journal Article
In: Nucleic Acids Res, vol. 37, no. 5, pp. 1602–1615, 2009.
Abstract | Links | BibTeX | Tags: annotation, ncRNAs
@article{Hertel:09,
title = {Non-coding RNA annotation of the genome of \textit{Ŧrichoplax adhaerens}},
author = {Jana Hertel and Danielle Jong and Manja Marz and Dominic Rose and Hakim Tafer and Andrea Tanzer and Bernd Schierwater and Peter F Stadler},
doi = {10.1093/nar/gkn1084},
year = {2009},
date = {2009-01-26},
urldate = {2009-01-01},
journal = {Nucleic Acids Res},
volume = {37},
number = {5},
pages = {1602--1615},
abstract = {A detailed annotation of non-protein coding RNAs is typically missing in initial releases of newly sequenced genomes. Here we report on a comprehensive ncRNA annotation of the genome of Trichoplax adhaerens, the presumably most basal metazoan whose genome has been published to-date. Since blast identified only a small fraction of the best-conserved ncRNAs--in particular rRNAs, tRNAs and some snRNAs--we developed a semi-global dynamic programming tool, GotohScan, to increase the sensitivity of the homology search. It successfully identified the full complement of major and minor spliceosomal snRNAs, the genes for RNase P and MRP RNAs, the SRP RNA, as well as several small nucleolar RNAs. We did not find any microRNA candidates homologous to known eumetazoan sequences. Interestingly, most ncRNAs, including the pol-III transcripts, appear as single-copy genes or with very small copy numbers in the Trichoplax genome.},
keywords = {annotation, ncRNAs},
pubstate = {published},
tppubtype = {article}
}
2007
Washietl, Stefan; Pedersen, Jakob S; Korbel, Jan O; Stocsits, Claudia; Gruber, Andreas R; Hackermüller, Jörg; Hertel, Jana; Lindemeyer, Manja; Reiche, Kristin; Tanzer, Andrea; Ucla, Catherine; Wyss, Carine; Antonarakis, Stylianos E; Denoeud, France; Lagarde, Julien; Drenkow, Jorg; Kapranov, Philipp; Gingeras, Thomas R; Guigó, Roderic; Snyder, Michael; Gerstein, Mark B; Reymond, Alexandre; Hofacker, Ivo L; Stadler, Peter F
Structured RNAs in the ENCODE selected regions of the human genome Journal Article
In: Genome Res, vol. 17, pp. 852–864, 2007.
Abstract | Links | BibTeX | Tags: annotation, evolution, ncRNAs, RNA structure
@article{Washietl:07,
title = {Structured RNAs in the ENCODE selected regions of the human genome},
author = {Stefan Washietl and Jakob S Pedersen and Jan O Korbel and Claudia Stocsits and Andreas R Gruber and Jörg Hackermüller and Jana Hertel and Manja Lindemeyer and Kristin Reiche and Andrea Tanzer and Catherine Ucla and Carine Wyss and Stylianos E Antonarakis and France Denoeud and Julien Lagarde and Jorg Drenkow and Philipp Kapranov and Thomas R Gingeras and Roderic Guigó and Michael Snyder and Mark B Gerstein and Alexandre Reymond and Ivo L Hofacker and Peter F Stadler},
url = {https://www.tbi.univie.ac.at/papers/SUPPLEMENTS/ENCODE/},
doi = {10.1101/gr.5650707},
year = {2007},
date = {2007-01-01},
urldate = {2007-01-01},
journal = {Genome Res},
volume = {17},
pages = {852--864},
abstract = {Functional RNA structures play an important role both in the context of noncoding RNA transcripts as well as regulatory elements in mRNAs. Here we present a computational study to detect functional RNA structures within the ENCODE regions of the human genome. Since structural RNAs in general lack characteristic signals in primary sequence, comparative approaches evaluating evolutionary conservation of structures are most promising. We have used three recently introduced programs based on either phylogenetic-stochastic context-free grammar (EvoFold) or energy directed folding (RNAz and AlifoldZ), yielding several thousand candidate structures (corresponding to approximately 2.7% of the ENCODE regions). EvoFold has its highest sensitivity in highly conserved and relatively AU-rich regions, while RNAz favors slightly GC-rich regions, resulting in a relatively small overlap between methods. Comparison with the GENCODE annotation points to functional RNAs in all genomic contexts, with a slightly increased density in 3'-UTRs. While we estimate a significant false discovery rate of approximately 50%-70% many of the predictions can be further substantiated by additional criteria: 248 loci are predicted by both RNAz and EvoFold, and an additional 239 RNAz or EvoFold predictions are supported by the (more stringent) AlifoldZ algorithm. Five hundred seventy RNAz structure predictions fall into regions that show signs of selection pressure also on the sequence level (i.e., conserved elements). More than 700 predictions overlap with noncoding transcripts detected by oligonucleotide tiling arrays. One hundred seventy-five selected candidates were tested by RT-PCR in six tissues, and expression could be verified in 43 cases (24.6%).},
keywords = {annotation, evolution, ncRNAs, RNA structure},
pubstate = {published},
tppubtype = {article}
}
