2025
Saghaei, Shahram; Siemers, Malte; Ossetek, Kilian L; Richter, Stephan; Edwards, Robert A; Roux, Simon; Zielezinski, Andrzej; Dutilh, Bas E; Marz, Manja; Cassman, Noriko A
VirJenDB: a FAIR (meta)data and bioinformatics platform for all viruses Journal Article
In: Nucleic Acids Research, 2025.
Abstract | Links | BibTeX | Tags: database, viruses
@article{nokey_97,
title = {VirJenDB: a FAIR (meta)data and bioinformatics platform for all viruses},
author = {Shahram Saghaei and Malte Siemers and Kilian L Ossetek and Stephan Richter and Robert A Edwards and Simon Roux and Andrzej Zielezinski and Bas E Dutilh and Manja Marz and Noriko A Cassman},
doi = {10.1093/nar/gkaf1224},
year = {2025},
date = {2025-12-17},
journal = {Nucleic Acids Research},
abstract = {High-throughput sequencing has generated an unprecedented volume of data. However, researcher-submitted data in repositories requires extensive curation and quality control for reuse. These tasks are hindered by the multiplicity of repositories, the sheer volume of the data, and the complexity of virus (meta)data curation. To address these challenges, VirJenDB offers a user-friendly platform to facilitate versioned, community-driven curation, and ontology development. Virus sequences were ingested from 16 sources, including ~200 fields of metadata or standards, covering taxonomy, sample, and host information. Up to 85 metadata fields have undergone at least one round of curation, and are linked to 15.4 million virus sequences, with 88 % from those infecting eukaryotes and the remaining infecting prokaryotes. Subsets were created, including a novel collection of 0.91 million viral operational taxonomic unit (vOTU) sequences across all viruses, while keeping the original sequences from each vOTU to facilitate downstream analyses, e.g. sequence variation. The VirJenDB web portal (https://www.virjendb.org) provides HTTPS and Application Programming Interface (API) access to the sequence datasets and metadata, offering a search engine, filtering, download, visualizations, and documentation. VirJenDB aims to connect the phage and eukaryotic virus research communities by supporting webtool integration, meta-analyses, and metadata schema extensions.},
keywords = {database, viruses},
pubstate = {published},
tppubtype = {article}
}
Ontiveros-Palacios, Nancy; Cooke, Emma; Nawrocki, Eric P.; Triebel, Sandra; Marz, Manja; Rivas, Elena; Griffiths-Jones, Sam; Petrov, Anton I.; Bateman, Alex; Sweeney, Blake
Rfam 15: RNA families database in 2025 Journal Article
In: Nucleic Acids Research, 2025.
Abstract | Links | BibTeX | Tags: database, ncRNAs, RNA structure, RNA-RNA interactions
@article{nokey_67,
title = {Rfam 15: RNA families database in 2025},
author = {Nancy Ontiveros-Palacios and Emma Cooke and Eric P. Nawrocki and Sandra Triebel and Manja Marz and Elena Rivas and Sam Griffiths-Jones and Anton I. Petrov and Alex Bateman and Blake Sweeney},
doi = {10.1093/nar/gkae1023},
year = {2025},
date = {2025-01-06},
urldate = {2024-11-11},
journal = {Nucleic Acids Research},
abstract = {The Rfam database, a widely used repository of non-coding RNA families, has undergone significant updates in release 15.0. This paper introduces major improvements, including the expansion of Rfamseq to 26 106 genomes, a 76% increase, incorporating the latest UniProt reference proteomes and additional viral genomes. Sixty-five RNA families were enhanced using experimentally determined 3D structures, improving the accuracy of consensus secondary structures and annotations. R-scape covariation analysis was used to refine structural predictions in 26 families. Gene Ontology (GO) and Sequence Ontology annotations were comprehensively updated, increasing GO term coverage to 75% of families. The release adds 14 new Hepatitis C Virus RNA families and completes microRNA family synchronization with miRBase, resulting in 1603 microRNA families. New data types, including FULL alignments, have been implemented. Integration with APICURON for improved curator attribution and multiple website enhancements further improve user experience. These updates significantly expand Rfam’s coverage and improve annotation quality, reinforcing its critical role in RNA research, genome annotation and the development of machine learning models. Rfam is freely available at https://rfam.org.},
keywords = {database, ncRNAs, RNA structure, RNA-RNA interactions},
pubstate = {published},
tppubtype = {article}
}
2023
Ritsch, Muriel; Cassman, Noriko A.; Saghaei, Shahram; Marz, Manja
Navigating the Landscape: A Comprehensive Review of Current Virus Databases Journal Article
In: Viruses, vol. 15, iss. 9, no. 1834, 2023, ISBN: 1999-4915.
Abstract | Links | BibTeX | Tags: database, viruses
@article{nokey_43,
title = {Navigating the Landscape: A Comprehensive Review of Current Virus Databases},
author = {Muriel Ritsch and Noriko A. Cassman and Shahram Saghaei and Manja Marz},
doi = {10.3390/v15091834},
isbn = {1999-4915},
year = {2023},
date = {2023-08-29},
journal = {Viruses},
volume = {15},
number = {1834},
issue = {9},
abstract = {Viruses are abundant and diverse entities that have important roles in public health, ecology, and agriculture. The identification and surveillance of viruses rely on an understanding of their genome organization, sequences, and replication strategy. Despite technological advancements in sequencing methods, our current understanding of virus diversity remains incomplete, highlighting the need to explore undiscovered viruses. Virus databases play a crucial role in providing access to sequences, annotations and other metadata, and analysis tools for studying viruses. However, there has not been a comprehensive review of virus databases in the last five years. This study aimed to fill this gap by identifying 24 active virus databases and included an extensive evaluation of their content, functionality and compliance with the FAIR principles. In this study, we thoroughly assessed the search capabilities of five database catalogs, which serve as comprehensive repositories housing a diverse array of databases and offering essential metadata. Moreover, we conducted a comprehensive review of different types of errors, encompassing taxonomy, names, missing information, sequences, sequence orientation, and chimeric sequences, with the intention of empowering users to effectively tackle these challenges. We expect this review to aid users in selecting suitable virus databases and other resources, and to help databases in error management and improve their adherence to the FAIR principles. The databases listed here represent the current knowledge of viruses and will help aid users find databases of interest based on content, functionality, and scope. The use of virus databases is integral to gaining new insights into the biology, evolution, and transmission of viruses, and developing new strategies to manage virus outbreaks and preserve global health.},
keywords = {database, viruses},
pubstate = {published},
tppubtype = {article}
}
2020
Kalvari, Ioanna; Nawrocki, Eric P; Ontiveros-Palacios, Nancy; Argasinska, Joanna; Lamkiewicz, Kevin; Marz, Manja; Griffiths-Jones, Sam; Toffano-Nioche, Claire; Gautheret, Daniel; Weinberg, Zasha; Rivas, Elena; Eddy, Sean R; Finn, Robert D; Bateman, Alex; Petrov, Anton I
Rfam 14: expanded coverage of metagenomic, viral and microRNA families Journal Article
In: Nucleic Acids Res, vol. 49, no. D1, pp. D192–D200, 2020.
Abstract | Links | BibTeX | Tags: alignment, annotation, bacteria, coronavirus, database, metagenomics, ncRNAs, RNA / transcriptomics, software, viruses
@article{Kalvari:21,
title = {Rfam 14: expanded coverage of metagenomic, viral and microRNA families},
author = {Ioanna Kalvari and Eric P Nawrocki and Nancy Ontiveros-Palacios and Joanna Argasinska and Kevin Lamkiewicz and Manja Marz and Sam Griffiths-Jones and Claire Toffano-Nioche and Daniel Gautheret and Zasha Weinberg and Elena Rivas and Sean R Eddy and Robert D Finn and Alex Bateman and Anton I Petrov},
url = {https://rfam.org/},
doi = {10.1093/nar/gkaa1047},
year = {2020},
date = {2020-11-19},
urldate = {2020-11-19},
journal = {Nucleic Acids Res},
volume = {49},
number = {D1},
pages = {D192--D200},
publisher = {Oxford University Press (OUP)},
abstract = {Rfam is a database of RNA families where each of the 3444 families is represented by a multiple sequence alignment of known RNA sequences and a covariance model that can be used to search for additional members of the family. Recent developments have involved expert collaborations to improve the quality and coverage of Rfam data, focusing on microRNAs, viral and bacterial RNAs. We have completed the first phase of synchronising microRNA families in Rfam and miRBase, creating 356 new Rfam families and updating 40. We established a procedure for comprehensive annotation of viral RNA families starting with Flavivirus and Coronaviridae RNAs. We have also increased the coverage of bacterial and metagenome-based RNA families from the ZWD database. These developments have enabled a significant growth of the database, with the addition of 759 new families in Rfam 14. To facilitate further community contribution to Rfam, expert users are now able to build and submit new families using the newly developed Rfam Cloud family curation system. New Rfam website features include a new sequence similarity search powered by RNAcentral, as well as search and visualisation of families with pseudoknots. Rfam is freely available at https://rfam.org.},
keywords = {alignment, annotation, bacteria, coronavirus, database, metagenomics, ncRNAs, RNA / transcriptomics, software, viruses},
pubstate = {published},
tppubtype = {article}
}
2011
Bateman, Alex; Agrawal, Shipra; Birney, Ewan; Bruford, Elspeth A.; Bujnicki, Janusz M.; Cochrane, Guy; Cole, James R.; Dinger, Marcel E.; Enright, Anton J.; Gardner, Paul P.; Gautheret, Daniel; Griffiths-Jones, Sam; Harrow, Jen; Herrero, Javier; Holmes, Ian H.; Huang, Hsien-Da; Kelly, Krystyna A.; Kersey, Paul; Kozomara, Ana; Lowe, Todd M.; Marz, Manja; Moxon, Simon; Pruitt, Kim D.; Samuelsson, Tore; Stadler, Peter F.; Vilella, Albert J.; Vogel, Jan-Hinnerk; Williams, Kelly P.; Wright, Mathew W.; Zwieb, Christian
RNAcentral: A vision for an international database of RNA sequences Journal Article
In: RNA, vol. 17, pp. 1941–1946, 2011.
Abstract | Links | BibTeX | Tags: database, ncRNAs, RNA / transcriptomics
@article{Bateman:11,
title = {RNAcentral: A vision for an international database of RNA sequences},
author = {Alex Bateman and Shipra Agrawal and Ewan Birney and Elspeth A. Bruford and Janusz M. Bujnicki and Guy Cochrane and James R. Cole and Marcel E. Dinger and Anton J. Enright and Paul P. Gardner and Daniel Gautheret and Sam Griffiths-Jones and Jen Harrow and Javier Herrero and Ian H. Holmes and Hsien-Da Huang and Krystyna A. Kelly and Paul Kersey and Ana Kozomara and Todd M. Lowe and Manja Marz and Simon Moxon and Kim D. Pruitt and Tore Samuelsson and Peter F. Stadler and Albert J. Vilella and Jan-Hinnerk Vogel and Kelly P. Williams and Mathew W. Wright and Christian Zwieb},
doi = {10.1261/rna.2750811},
year = {2011},
date = {2011-09-22},
urldate = {2011-09-22},
journal = {RNA},
volume = {17},
pages = {1941--1946},
abstract = {During the last decade there has been a great increase in the number of noncoding RNA genes identified, including new classes such as microRNAs and piRNAs. There is also a large growth in the amount of experimental characterization of these RNA components. Despite this growth in information, it is still difficult for researchers to access RNA data, because key data resources for noncoding RNAs have not yet been created. The most pressing omission is the lack of a comprehensive RNA sequence database, much like UniProt, which provides a comprehensive set of protein knowledge. In this article we propose the creation of a new open public resource that we term RNAcentral, which will contain a comprehensive collection of RNA sequences and fill an important gap in the provision of biomedical databases. We envision RNA researchers from all over the world joining a federated RNAcentral network, contributing specialized knowledge and databases. RNAcentral would centralize key data that are currently held across a variety of databases, allowing researchers instant access to a single, unified resource. This resource would facilitate the next generation of RNA research and help drive further discoveries, including those that improve food production and human and animal health. We encourage additional RNA database resources and research groups to join this effort. We aim to obtain international network funding to further this endeavor.},
keywords = {database, ncRNAs, RNA / transcriptomics},
pubstate = {published},
tppubtype = {article}
}
