2025
Eulenfeld, Tom; Triebel, Sandra; Marz, Manja
AnchoRNA: Full virus genome alignments through conserved anchor regions Journal Article
In: bioRxiv, 2025.
Abstract | Links | BibTeX | Tags: alignment, phylogenetics, software, viruses
@article{nokey_67,
title = {AnchoRNA: Full virus genome alignments through conserved anchor regions},
author = {Tom Eulenfeld and Sandra Triebel and Manja Marz},
doi = {10.1101/2025.01.30.635689},
year = {2025},
date = {2025-12-15},
urldate = {2025-02-01},
journal = {bioRxiv},
abstract = {Multiple sequence alignment of full viral genomes can be challenging due to factors such as long sequences, large insertions/deletions (spanning several 100 nucleotides), large number of sequences, sequence divergence, and high computational complexity in particular when computing alignments based on RNA secondary structures. Standard alignment methods often face these issues, in particular when processing highly variable sequences or when specific phylogenetic analysis is required on selected subsequences.
We present an algorithm to determine high quality anchors that define partitions of sequences and guide the alignment of viral genomes to respect well conserved, and therefore functionally significant, regions. This new approach is implemented in the Python-based command line tool AnchoRNA, which is designed to identify conserved regions, or anchors, within coding sequences. By default, anchors are searched in translated coding sequences accounting for high mutation rates in viral genomes. AnchoRNA enhances the accuracy and efficiency of full-genome alignment by focusing on these crucial conserved regions. AnchoRNA guided alignments are systematically compared to the results of 3 alignment programs. Utilizing a dataset of 55 representative Pestivirus genomes, AnchoRNA identified 55 anchors that are used for guiding the alignment process. The incorporation of these anchors led to improvements across tested alignment tools, highlighting the effectiveness of AnchoRNA in enhancing alignment quality, especially in viral genomes.},
keywords = {alignment, phylogenetics, software, viruses},
pubstate = {published},
tppubtype = {article}
}
We present an algorithm to determine high quality anchors that define partitions of sequences and guide the alignment of viral genomes to respect well conserved, and therefore functionally significant, regions. This new approach is implemented in the Python-based command line tool AnchoRNA, which is designed to identify conserved regions, or anchors, within coding sequences. By default, anchors are searched in translated coding sequences accounting for high mutation rates in viral genomes. AnchoRNA enhances the accuracy and efficiency of full-genome alignment by focusing on these crucial conserved regions. AnchoRNA guided alignments are systematically compared to the results of 3 alignment programs. Utilizing a dataset of 55 representative Pestivirus genomes, AnchoRNA identified 55 anchors that are used for guiding the alignment process. The incorporation of these anchors led to improvements across tested alignment tools, highlighting the effectiveness of AnchoRNA in enhancing alignment quality, especially in viral genomes.
Lataretu, Marie; Krautwurst, Sebastian; Huska, Matthew R; Marquet, Mike; Viehweger, Adrian; Braun, Sascha D; Brandt, Christian; Hölzer, Martin
Targeted decontamination of sequencing data with CLEAN Journal Article
In: NAR Genomics and Bioinformatics, vol. 7, 2025.
Abstract | Links | BibTeX | Tags: assembly, metagenomics, nanopore, RNA / transcriptomics, software
@article{nokey_81,
title = {Targeted decontamination of sequencing data with CLEAN},
author = {Marie Lataretu and Sebastian Krautwurst and Matthew R Huska and Mike Marquet and Adrian Viehweger and Sascha D Braun and Christian Brandt and Martin Hölzer},
doi = {10.1093/nargab/lqaf105},
year = {2025},
date = {2025-07-04},
urldate = {2025-07-04},
journal = {NAR Genomics and Bioinformatics},
volume = {7},
abstract = {Many biological and medical questions are answered based on the analysis of sequence data. However, we can find contamination, artificial spike-ins, and overrepresented rRNA (ribosomal RNA) sequences in various read collections and assemblies. In particular, spike-ins used as controls, as those known from Illumina or Nanopore data, are often not considered as contaminants and also not appropriately removed during analyses. Additionally, removing human host DNA may be necessary for data protection and ethical considerations to ensure that individuals cannot be identified. We developed CLEAN, a pipeline to remove unwanted sequences from both long- and short-read sequencing techniques. While focusing on Illumina and Nanopore data with their technology-specific control sequences, the pipeline can also be used for host decontamination of metagenomic reads and assemblies, or the removal of rRNA from RNA-Seq data. The results are the purified sequences and sequences identified as contaminated with statistics summarized in a report. The output can be used directly in subsequent analyses, resulting in faster computations and improved results. Although decontamination seems mundane, many contaminants are routinely overlooked, cleaned by steps that are not fully reproducible or difficult to trace. CLEAN facilitates reproducible, platform-independent data analysis in genomics and transcriptomics and is freely available at https://github.com/rki-mf1/clean under a BSD3 license.},
keywords = {assembly, metagenomics, nanopore, RNA / transcriptomics, software},
pubstate = {published},
tppubtype = {article}
}
Collatz, Maximilian; Braun, Sascha D.; Reinicke, Martin; Müller, Elke; Monecke, Stefan; Ehricht, Ralf
AssayBLAST: A Bioinformatic Tool for In Silico Analysis of Molecular Multiparameter Assays Journal Article
In: Applied Biosciences, vol. 4, 2025.
Abstract | Links | BibTeX | Tags: alignment, DNA / genomics, software
@article{nokey_89,
title = {AssayBLAST: A Bioinformatic Tool for In Silico Analysis of Molecular Multiparameter Assays},
author = {Maximilian Collatz and Sascha D. Braun and Martin Reinicke and Elke Müller and Stefan Monecke and Ralf Ehricht},
doi = {10.3390/applbiosci4020018},
year = {2025},
date = {2025-04-01},
journal = {Applied Biosciences},
volume = {4},
abstract = {Accurate primer and probe design is essential for molecular applications, including PCR, qPCR, and molecular multiparameter assays like microarrays. The novel software tool AssayBLAST addresses this need by simulating interactions between oligonucleotides and target sequences. AssayBLAST handles large sets of primer and probe sequences simultaneously and supports comprehensive assay designs by allowing users to identify off-target binding, calculate melting temperatures, and ensure strand specificity, a critical but often overlooked aspect. AssayBLAST performs two optimized BLAST-based searches for each primer or probe sequence, checking the forward and reverse strands for off-target interactions and strand-specific binding accuracy. The results are compiled into a mapping table containing binding sites, mismatches, and strand orientation, allowing users to validate large sets of oligonucleotides across predefined custom databases for a complete and optimal theoretical assay design. AssayBLAST was evaluated against experimental Staphylococcus aureus microarray data, achieving 97.5% accuracy in predicting probe–target hybridization outcomes. This high accuracy demonstrates the method’s effectiveness in reliably using BLAST hits and mismatch counts to predict microarray results. AssayBLAST provides a reliable, scalable solution for in silico primer and probe validation, effectively supporting large-scale assay designs and optimizations. Its accurate prediction of hybridization outcomes demonstrates its utility in enhancing the efficiency and reliability of molecular assays.},
keywords = {alignment, DNA / genomics, software},
pubstate = {published},
tppubtype = {article}
}
Meyer, Daria; Barth, Emanuel; Wiehle, Laura; Marz, Manja
diffONT: predicting methylation-specific PCR biomarkers based on nanopore sequencing data for clinical application Journal Article
In: bioRxiv, 2025.
Abstract | Links | BibTeX | Tags: cancer, DNA / genomics, nanopore, nucleic acid modifications, software
@article{nokey_73,
title = {diffONT: predicting methylation-specific PCR biomarkers based on nanopore sequencing data for clinical application},
author = {Daria Meyer and Emanuel Barth and Laura Wiehle and Manja Marz},
doi = {10.1101/2025.02.17.638597},
year = {2025},
date = {2025-02-20},
urldate = {2025-02-20},
journal = {bioRxiv},
abstract = {DNA methylation is known to act as biomarker applicable for clinical diagnostics, especially in cancer detection. Methylation-specific PCR (MSP) is a widely used approach to screen patient samples fast and efficiently for differential methylation. During MSP, methylated regions are selectively amplified with specific primers. With nanopore sequencing, knowledge about DNA methylation is generated during direct DNA sequencing, without any need for pretreatment of the DNA. Multiple methods, mainly developed for whole-genome bisulfite sequencing (WGBS) data, exist to predict differentially methylated regions (DMRs) in the genome. However, the predicted DMRs are often very large, and not sufficiently discriminating to generate meaningful results in MSP creating a gap between theoretical cancer marker research and practical application, as no tool currently provides methylation difference predictions tailored for PCR-based diagnostics. Here we present diffONT, which predicts differentially methylated primer regions, directly suitable for MSP primer design and thus allowing a direct translation into practical approaches. diffONT takes into account (i) the specific length of primer and amplicon regions, (ii) the fact that one condition should be unmethylated, and (iii) a minimal required amount of differentially methylated cytosines within the primer regions. Based on two nanopore sequencing data sets we compared the results of diffONT to metilene, DSS and pycoMeth. We show that the regions predicted by diffONT are more specific towards hypermethylated regions and more usable for MSP. diffONT accelerates the design of methylation-specific diagnostic assays, bridging the gap between theoretical research and clinical application.Competing Interest Statement. The authors have declared no competing interest.},
keywords = {cancer, DNA / genomics, nanopore, nucleic acid modifications, software},
pubstate = {published},
tppubtype = {article}
}
2024
Lamkiewicz, Kevin; Barf, Lisa-Marie; Sachse, Konrad; Hölzer, Martin
RIBAP: a comprehensive bacterial core genome annotation pipeline for pangenome calculation beyond the species level Journal Article
In: Genome Biology, vol. 25, iss. 1, 2024.
Abstract | Links | BibTeX | Tags: annotation, bacteria, DNA / genomics, evolution, software
@article{nokey_63,
title = {RIBAP: a comprehensive bacterial core genome annotation pipeline for pangenome calculation beyond the species level},
author = {Kevin Lamkiewicz and Lisa-Marie Barf and Konrad Sachse and Martin Hölzer},
doi = {10.1186/s13059-024-03312-9},
year = {2024},
date = {2024-07-01},
journal = {Genome Biology},
volume = {25},
issue = {1},
abstract = {Microbial pangenome analysis identifies present or absent genes in prokaryotic genomes. However, current tools are limited when analyzing species with higher sequence diversity or higher taxonomic orders such as genera or families. The Roary ILP Bacterial core Annotation Pipeline (RIBAP) uses an integer linear programming approach to refine gene clusters predicted by Roary for identifying core genes. RIBAP successfully handles the complexity and diversity of Chlamydia, Klebsiella, Brucella, and Enterococcus genomes, outperforming other established and recent pangenome tools for identifying all-encompassing core genes at the genus level. RIBAP is a freely available Nextflow pipeline at github.com/hoelzer-lab/ribap and zenodo.org/doi/10.5281/zenodo.10890871.},
keywords = {annotation, bacteria, DNA / genomics, evolution, software},
pubstate = {published},
tppubtype = {article}
}
Collatz, Maximilian; Reinicke, Martin; Diezel, Celia; Braun, Sascha D.; Monecke, Stefan; Reissig, Annett; Ehricht, Ralf
In: BioMedInformatics, vol. 4, 2024.
Abstract | Links | BibTeX | Tags: bacteria, DNA / genomics, software
@article{nokey_93,
title = {ConsensusPrime—A Bioinformatic Pipeline for Efficient Consensus Primer Design—Detection of Various Resistance and Virulence Factors in MRSA—A Case Study},
author = {Maximilian Collatz and Martin Reinicke and Celia Diezel and Sascha D. Braun and Stefan Monecke and Annett Reissig and Ralf Ehricht},
doi = {10.3390/biomedinformatics4020068},
year = {2024},
date = {2024-05-10},
urldate = {2024-05-10},
journal = {BioMedInformatics},
volume = {4},
abstract = {Background: The effectiveness and reliability of diagnostic tests that detect DNA sequences largely hinge on the quality of the used primers and probes. This importance is especially evident when considering the specific sample being analyzed, as it affects the molecular background and potential for cross-reactivity, ultimately determining the test’s performance.
Methods: Predicting primers based on the consensus sequence of the target has multiple advantages, including high specificity, diagnostic reliability, broad applicability, and long-term validity. Automated curation of the input sequences ensures high-quality primers and probes.
Results: Here, we present a use case for developing a set of consensus primers and probes to identify antibiotic resistance and virulence genes in Staphylococcus (S.) aureus using the ConsensusPrime pipeline. Extensive qPCR experiments with several S. aureus strains confirm the exceptional quality of the primers designed using the pipeline.
Conclusions: By improving the quality of the input sequences and using the consensus sequence as a basis, the ConsensusPrime pipeline pipeline ensures high-quality primers and probes, which should be the basis of molecular assays.},
keywords = {bacteria, DNA / genomics, software},
pubstate = {published},
tppubtype = {article}
}
Methods: Predicting primers based on the consensus sequence of the target has multiple advantages, including high specificity, diagnostic reliability, broad applicability, and long-term validity. Automated curation of the input sequences ensures high-quality primers and probes.
Results: Here, we present a use case for developing a set of consensus primers and probes to identify antibiotic resistance and virulence genes in Staphylococcus (S.) aureus using the ConsensusPrime pipeline. Extensive qPCR experiments with several S. aureus strains confirm the exceptional quality of the primers designed using the pipeline.
Conclusions: By improving the quality of the input sequences and using the consensus sequence as a basis, the ConsensusPrime pipeline pipeline ensures high-quality primers and probes, which should be the basis of molecular assays.
2023
Hufsky, Franziska; Abecasis, Ana B.; Babaian, Artem; Beck, Sebastian; Brierley, Liam; Dellicour, Simon; Eggeling, Christian; Elena, Santiago F.; Gieraths, Udo; Ha, Anh D.; Harvey, Will; Jones, Terry C.; Lamkiewicz, Kevin; Lovate, Gabriel L.; Lücking, Dominik; Machyna, Martin; Nishimura, Luca; Nocke, Maximilian K.; Renard, Bernard Y.; Sakaguchi, Shoichi; Sakellaridi, Lygeri; Spangenberg, Jannes; Tarradas-Alemany, Maria; Triebel, Sandra; Vakulenko, Yulia; Wijesekara, Rajitha Yasas; González-Candelas, Fernando; Krautwurst, Sarah; Pérez-Cataluña, Alba; Randazzo, Walter; Sánchez, Gloria; Marz, Manja
The International Virus Bioinformatics Meeting 2023 Journal Article
In: Viruses, vol. 15, iss. 10, 2023, ISSN: 1999-4915.
Abstract | Links | BibTeX | Tags: annotation, software, virus host interaction, viruses
@article{nokey_47,
title = {The International Virus Bioinformatics Meeting 2023},
author = {Franziska Hufsky and Ana B. Abecasis and Artem Babaian and Sebastian Beck and Liam Brierley and Simon Dellicour and Christian Eggeling and Santiago F. Elena and Udo Gieraths and Anh D. Ha and Will Harvey and Terry C. Jones and Kevin Lamkiewicz and Gabriel L. Lovate and Dominik Lücking and Martin Machyna and Luca Nishimura and Maximilian K. Nocke and Bernard Y. Renard and Shoichi Sakaguchi and Lygeri Sakellaridi and Jannes Spangenberg and Maria Tarradas-Alemany and Sandra Triebel and Yulia Vakulenko and Rajitha Yasas Wijesekara and Fernando González-Candelas and Sarah Krautwurst and Alba Pérez-Cataluña and Walter Randazzo and Gloria Sánchez and Manja Marz},
doi = {10.3390/v15102031},
issn = {1999-4915},
year = {2023},
date = {2023-09-30},
urldate = {2023-09-30},
journal = {Viruses},
volume = {15},
issue = {10},
abstract = {The 2023 International Virus Bioinformatics Meeting was held in Valencia, Spain, from 24–26 May 2023, attracting approximately 180 participants worldwide. The primary objective of the conference was to establish a dynamic scientific environment conducive to discussion, collaboration, and the generation of novel research ideas. As the first in-person event following the SARS-CoV-2 pandemic, the meeting facilitated highly interactive exchanges among attendees. It served as a pivotal gathering for gaining insights into the current status of virus bioinformatics research and engaging with leading researchers and emerging scientists. The event comprised eight invited talks, 19 contributed talks, and 74 poster presentations across eleven sessions spanning three days. Topics covered included machine learning, bacteriophages, virus discovery, virus classification, virus visualization, viral infection, viromics, molecular epidemiology, phylodynamic analysis, RNA viruses, viral sequence analysis, viral surveillance, and metagenomics. This report provides rewritten abstracts of the presentations, a summary of the key research findings, and highlights shared during the meeting.},
keywords = {annotation, software, virus host interaction, viruses},
pubstate = {published},
tppubtype = {article}
}
Rangel-Pineros, Guillermo; Almeida, Alexandre; Beracochea, Martin; Sakharova, Ekaterina; Marz, Manja; Muñoz, Alejandro Reyes; Hölzer, Martin; Finn, Robert D.
VIRify: An integrated detection, annotation and taxonomic classification pipeline using virus-specific protein profile hidden Markov models Journal Article
In: PLOS Comput Biol, vol. 19, iss. 8, pp. e1011422, 2023.
Abstract | Links | BibTeX | Tags: annotation, classification, metagenomics, phylogenetics, software, viruses
@article{nokey,
title = {VIRify: An integrated detection, annotation and taxonomic classification pipeline using virus-specific protein profile hidden Markov models},
author = {Guillermo Rangel-Pineros and Alexandre Almeida and Martin Beracochea and Ekaterina Sakharova and Manja Marz and Alejandro Reyes Muñoz and Martin Hölzer and Robert D. Finn },
doi = {10.1371/journal.pcbi.1011422},
year = {2023},
date = {2023-08-28},
journal = {PLOS Comput Biol},
volume = {19},
issue = {8},
pages = {e1011422},
abstract = {The study of viral communities has revealed the enormous diversity and impact these biological entities have on various ecosystems. These observations have sparked widespread interest in developing computational strategies that support the comprehensive characterisation of viral communities based on sequencing data. Here we introduce VIRify, a new computational pipeline designed to provide a user-friendly and accurate functional and taxonomic characterisation of viral communities. VIRify identifies viral contigs and prophages from metagenomic assemblies and annotates them using a collection of viral profile hidden Markov models (HMMs). These include our manually-curated profile HMMs, which serve as specific taxonomic markers for a wide range of prokaryotic and eukaryotic viral taxa and are thus used to reliably classify viral contigs. We tested VIRify on assemblies from two microbial mock communities, a large metagenomics study, and a collection of publicly available viral genomic sequences from the human gut. The results showed that VIRify could identify sequences from both prokaryotic and eukaryotic viruses, and provided taxonomic classifications from the genus to the family rank with an average accuracy of 86.6%. In addition, VIRify allowed the detection and taxonomic classification of a range of prokaryotic and eukaryotic viruses present in 243 marine metagenomic assemblies. Finally, the use of VIRify led to a large expansion in the number of taxonomically classified human gut viral sequences and the improvement of outdated and shallow taxonomic classifications. Overall, we demonstrate that VIRify is a novel and powerful resource that offers an enhanced capability to detect a broad range of viral contigs and taxonomically classify them.},
keywords = {annotation, classification, metagenomics, phylogenetics, software, viruses},
pubstate = {published},
tppubtype = {article}
}
Spangenberg, Jannes; zu Siederdissen, Christian Höner; Žarković, Milena; Triebel, Sandra; Rose, Ruben; Christophersen, Christina Martínez; Paltzow, Lea; Hegab, Mohsen M.; Wansorra, Anna; Srivastava, Akash; Krumbholz, Andi; Marz, Manja
Magnipore: Prediction of differential single nucleotide changes in the Oxford Nanopore Technologies sequencing signal of SARS-CoV-2 samples Journal Article
In: bioRxiv, 2023.
Abstract | Links | BibTeX | Tags: coronavirus, nanopore, nucleic acid modifications, RNA / transcriptomics, software, viruses
@article{nokey,
title = {Magnipore: Prediction of differential single nucleotide changes in the Oxford Nanopore Technologies sequencing signal of SARS-CoV-2 samples},
author = {Jannes Spangenberg and Christian {Höner zu Siederdissen} and Milena Žarković and Sandra Triebel and Ruben Rose and Christina Martínez Christophersen and Lea Paltzow and Mohsen M. Hegab and Anna Wansorra and Akash Srivastava and Andi Krumbholz and Manja Marz},
doi = {10.1101/2023.03.17.533105},
year = {2023},
date = {2023-03-17},
urldate = {2023-03-17},
journal = {bioRxiv},
abstract = {Oxford Nanopore Technologies (ONT) allows direct sequencing of ribonucleic acids (RNA) and, in addition, detection of possible RNA modifications due to deviations from the expected ONT signal. The software available so far for this purpose can only detect a small number of modifications. Alternatively, two samples can be compared for different RNA modifications. We present Magnipore, a novel tool to search for significant signal shifts between samples of Oxford Nanopore data from similar or related species. Magnipore classifies them into mutations and potential modifications. We use Magnipore to compare SARS-CoV-2 samples. Included were representatives of the early 2020s Pango lineages (n=6), samples from Pango lineages B.1.1.7 (n=2, Alpha), B.1.617.2 (n=1, Delta), and B.1.529 (n=7, Omicron). Magnipore utilizes position-wise Gaussian distribution models and a comprehensible significance threshold to find differential signals. In the case of Alpha and Delta, Magnipore identifies 55 detected mutations and 15 sites that hint at differential modifications. We predicted potential virus-variant and variant-group-specific differential modifications. Magnipore contributes to advancing RNA modification analysis in the context of viruses and virus variants.},
keywords = {coronavirus, nanopore, nucleic acid modifications, RNA / transcriptomics, software, viruses},
pubstate = {published},
tppubtype = {article}
}
2022
Collatz, Maximilian; Braun, Sascha D.; Monecke, Stefan; Ehricht, Ralf
ConsensusPrime—A Bioinformatic Pipeline for Ideal Consensus Primer Design Journal Article
In: BioMedInformatics, vol. 2, 2022.
Abstract | Links | BibTeX | Tags: alignment, DNA / genomics, software
@article{nokey_91,
title = {ConsensusPrime—A Bioinformatic Pipeline for Ideal Consensus Primer Design},
author = {Maximilian Collatz and Sascha D. Braun and Stefan Monecke and Ralf Ehricht},
doi = {10.3390/biomedinformatics2040041},
year = {2022},
date = {2022-11-24},
urldate = {2022-11-24},
journal = {BioMedInformatics},
volume = {2},
abstract = {Background: High-quality oligonucleotides for molecular amplification and detection procedures of diverse target sequences depend on sequence homology. Processing input sequences and identifying homogeneous regions in alignments can be carried out by hand only if they are small and contain sequences of high similarity. Finding the best regions for large and inhomogeneous alignments needs to be automated.
Results: The ConsensusPrime pipeline was developed to sort out redundant and technical interfering data in multiple sequence alignments and detect the most homologous regions from multiple sequences. It automates the prediction of optimal consensus primers for molecular analytical and sequence-based procedures/assays.
Conclusion: ConsensusPrime is a fast and easy-to-use pipeline for predicting optimal consensus primers that is executable on local systems without depending on external resources and web services. An implementation in a Docker image ensures platform-independent executability and installability despite the combination of multiple programs. The source code and installation instructions are publicly available on GitHub.},
keywords = {alignment, DNA / genomics, software},
pubstate = {published},
tppubtype = {article}
}
Results: The ConsensusPrime pipeline was developed to sort out redundant and technical interfering data in multiple sequence alignments and detect the most homologous regions from multiple sequences. It automates the prediction of optimal consensus primers for molecular analytical and sequence-based procedures/assays.
Conclusion: ConsensusPrime is a fast and easy-to-use pipeline for predicting optimal consensus primers that is executable on local systems without depending on external resources and web services. An implementation in a Docker image ensures platform-independent executability and installability despite the combination of multiple programs. The source code and installation instructions are publicly available on GitHub.
Hufsky, Franziska; Beslic, Denis; Boeckaerts, Dimitri; Duchene, Sebastian; González-Tortuero, Enrique; Gruber, Andreas J; Guo, Jiarong; Jansen, Daan; Juma, John; Kongkitimanon, Kunaphas; Luque, Antoni; Ritsch, Muriel; Lovate, Gabriel L.; Nishimura, Luca; Pas, Célia; Domingo, Esteban; Hodcroft, Emma; Lemey, Philippe; Sullivan, Matthew B; Weber, Friedemann; González-Candelas, Fernando; Krautwurst, Sarah; Pérez-Cataluña, Alba; Randazzo, Walter; Sánchez, Gloria; Marz, Manja
The International Virus Bioinformatics Meeting 2022 Journal Article
In: Viruses, vol. 14, iss. 5, pp. 973, 2022.
Abstract | Links | BibTeX | Tags: annotation, software, virus host interaction, viruses
@article{Hufsky2022,
title = {The International Virus Bioinformatics Meeting 2022},
author = {Franziska Hufsky and Denis Beslic and Dimitri Boeckaerts and Sebastian Duchene and Enrique González-Tortuero and Andreas J Gruber and Jiarong Guo and Daan Jansen and John Juma and Kunaphas Kongkitimanon and Antoni Luque and Muriel Ritsch and Gabriel L. Lovate and Luca Nishimura and Célia Pas and Esteban Domingo and Emma Hodcroft and Philippe Lemey and Matthew B Sullivan and Friedemann Weber and Fernando González-Candelas and Sarah Krautwurst and Alba Pérez-Cataluña and Walter Randazzo and Gloria Sánchez and Manja Marz },
doi = {10.3390/v14050973},
year = {2022},
date = {2022-05-05},
urldate = {2022-05-05},
journal = {Viruses},
volume = {14},
issue = {5},
pages = {973},
abstract = {The International Virus Bioinformatics Meeting 2022 took place online, on 23-25 March 2022, and has attracted about 380 participants from all over the world. The goal of the meeting was to provide a meaningful and interactive scientific environment to promote discussion and collaboration and to inspire and suggest new research directions and questions. The participants created a highly interactive scientific environment even without physical face-to-face interactions. This meeting is a focal point to gain an insight into the state-of-the-art of the virus bioinformatics research landscape and to interact with researchers in the forefront as well as aspiring young scientists. The meeting featured eight invited and 18 contributed talks in eight sessions on three days, as well as 52 posters, which were presented during three virtual poster sessions. The main topics were: SARS-CoV-2, viral emergence and surveillance, virus-host interactions, viral sequence analysis, virus identification and annotation, phages, and viral diversity. This report summarizes the main research findings and highlights presented at the meeting.},
keywords = {annotation, software, virus host interaction, viruses},
pubstate = {published},
tppubtype = {article}
}
Hufsky, Franziska; Marz, Manja
Gib mir den Virus und ich sag dir den Wirt Journal Article
In: BIOSpektrum, vol. 28, pp. 225–226, 2022.
Links | BibTeX | Tags: software, virus host interaction, viruses
@article{nokey,
title = {Gib mir den Virus und ich sag dir den Wirt},
author = {Franziska Hufsky and Manja Marz},
doi = {10.1007/s12268-022-1732-7},
year = {2022},
date = {2022-03-28},
journal = {BIOSpektrum},
volume = {28},
pages = {225–226},
keywords = {software, virus host interaction, viruses},
pubstate = {published},
tppubtype = {article}
}
2021
Brandt, Christian; Krautwurst, Sebastian; Spott, Riccardo; Lohde, Mara; Jundzill, Mateusz; Marquet, Mike; Hölzer, Martin
poreCov - An Easy to Use, Fast, and Robust Workflow for SARS-CoV-2 Genome Reconstruction via Nanopore Sequencing Journal Article
In: Front Genet, vol. 12, pp. 711437, 2021.
Abstract | Links | BibTeX | Tags: coronavirus, nanopore, RNA / transcriptomics, software, viruses
@article{Brandt2021,
title = {poreCov - An Easy to Use, Fast, and Robust Workflow for SARS-CoV-2 Genome Reconstruction via Nanopore Sequencing},
author = {Christian Brandt and Sebastian Krautwurst and Riccardo Spott and Mara Lohde and Mateusz Jundzill and Mike Marquet and Martin Hölzer},
url = {https://github.com/replikation/poreCov},
doi = {10.3389/fgene.2021.711437},
year = {2021},
date = {2021-07-28},
urldate = {2021-07-28},
journal = {Front Genet},
volume = {12},
pages = {711437},
abstract = {In response to the SARS-CoV-2 pandemic, a highly increased sequencing effort has been established worldwide to track and trace ongoing viral evolution. Technologies, such as nanopore sequencing via the ARTIC protocol are used to reliably generate genomes from raw sequencing data as a crucial base for molecular surveillance. However, for many labs that perform SARS-CoV-2 sequencing, bioinformatics is still a major bottleneck, especially if hundreds of samples need to be processed in a recurring fashion. Pipelines developed for short-read data cannot be applied to nanopore data. Therefore, specific long-read tools and parameter settings need to be orchestrated to enable accurate genotyping and robust reference-based genome reconstruction of SARS-CoV-2 genomes from nanopore data. Here we present poreCov, a highly parallel workflow written in Nextflow, using containers to wrap all the tools necessary for a routine SARS-CoV-2 sequencing lab into one program. The ease of installation, combined with concise summary reports that clearly highlight all relevant information, enables rapid and reliable analysis of hundreds of SARS-CoV-2 raw sequence data sets or genomes. poreCov is freely available on GitHub under the GNUv3 license: github.com/replikation/poreCov.},
keywords = {coronavirus, nanopore, RNA / transcriptomics, software, viruses},
pubstate = {published},
tppubtype = {article}
}
Damme, Renaud Van; Hölzer, Martin; Viehweger, Adrian; Müller, Bettina; Bongcam-Rudloff, Erik; Brandt, Christian
Metagenomics workflow for hybrid assembly, differential coverage binning, metatranscriptomics and pathway analysis (MUFFIN) Journal Article
In: PLOS Comput Biol, vol. 17, no. 2, pp. e1008716, 2021.
Abstract | Links | BibTeX | Tags: annotation, assembly, classification, DNA / genomics, metagenomics, RNA / transcriptomics, software
@article{VanDamme:21,
title = {Metagenomics workflow for hybrid assembly, differential coverage binning, metatranscriptomics and pathway analysis (MUFFIN)},
author = {Renaud Van Damme and Martin Hölzer and Adrian Viehweger and Bettina Müller and Erik Bongcam-Rudloff and Christian Brandt},
editor = {Mihaela Pertea},
url = {https://github.com/RVanDamme/MUFFIN},
doi = {10.1371/journal.pcbi.1008716},
year = {2021},
date = {2021-02-09},
urldate = {2021-02-09},
journal = {PLOS Comput Biol},
volume = {17},
number = {2},
pages = {e1008716},
publisher = {Public Library of Science (PLoS)},
abstract = {Metagenomics has redefined many areas of microbiology. However, metagenome-assembled genomes (MAGs) are often fragmented, primarily when sequencing was performed with short reads. Recent long-read sequencing technologies promise to improve genome reconstruction. However, the integration of two different sequencing modalities makes downstream analyses complex. We, therefore, developed MUFFIN, a complete metagenomic workflow that uses short and long reads to produce high-quality bins and their annotations. The workflow is written by using Nextflow, a workflow orchestration software, to achieve high reproducibility and fast and straightforward use. This workflow also produces the taxonomic classification and KEGG pathways of the bins and can be further used for quantification and annotation by providing RNA-Seq data (optionally). We tested the workflow using twenty biogas reactor samples and assessed the capacity of MUFFIN to process and output relevant files needed to analyze the microbial community and their function. MUFFIN produces functional pathway predictions and, if provided de novo metatranscript annotations across the metagenomic sample and for each bin. MUFFIN is available on github under GNUv3 licence: https://github.com/RVanDamme/MUFFIN.},
keywords = {annotation, assembly, classification, DNA / genomics, metagenomics, RNA / transcriptomics, software},
pubstate = {published},
tppubtype = {article}
}
2020
Lataretu, Marie; Hölzer, Martin
RNAflow: An Effective and Simple RNA-Seq Differential Gene Expression Pipeline Using Nextflow Journal Article
In: Genes, vol. 11, no. 12, pp. 1487, 2020.
Abstract | Links | BibTeX | Tags: differential expression analysis, RNA / transcriptomics, software
@article{Lataretu:20,
title = {RNAflow: An Effective and Simple RNA-Seq Differential Gene Expression Pipeline Using Nextflow},
author = {Marie Lataretu and Martin Hölzer},
url = {https://github.com/hoelzer-lab/rnaflow},
doi = {10.3390/genes11121487},
year = {2020},
date = {2020-12-10},
urldate = {2020-01-01},
journal = {Genes},
volume = {11},
number = {12},
pages = {1487},
publisher = {MDPI AG},
abstract = {RNA-Seq enables the identification and quantification of RNA molecules, often with the aim of detecting differentially expressed genes (DEGs). Although RNA-Seq evolved into a standard technique, there is no universal gold standard for these data’s computational analysis. On top of that, previous studies proved the irreproducibility of RNA-Seq studies. Here, we present a portable, scalable, and parallelizable Nextflow RNA-Seq pipeline to detect DEGs, which assures a high level of reproducibility. The pipeline automatically takes care of common pitfalls, such as ribosomal RNA removal and low abundance gene filtering. Apart from various visualizations for the DEG results, we incorporated downstream pathway analysis for common species as Homo sapiens and Mus musculus. We evaluated the DEG detection functionality while using qRT-PCR data serving as a reference and observed a very high correlation of the logarithmized gene expression fold changes.},
keywords = {differential expression analysis, RNA / transcriptomics, software},
pubstate = {published},
tppubtype = {article}
}
Hufsky, Franziska; Beerenwinkel, Niko; Meyer, Irmtraud M.; Roux, Simon; Cook, Georgia May; Kinsella, Cormac M.; Lamkiewicz, Kevin; Marquet, Mike; Nieuwenhuijse, David F.; Olendraite, Ingrida; Paraskevopoulou, Sofia; Young, Francesca; Dijkman, Ronald; Ibrahim, Bashar; Kelly, Jenna; Mercier, Philippe Le; Marz, Manja; Ramette, Alban; Thiel, Volker
The International Virus Bioinformatics Meeting 2020 Journal Article
In: Viruses, vol. 12, no. 12, pp. 1398, 2020.
Abstract | Links | BibTeX | Tags: classification, conference report, evolution, metagenomics, software, viruses
@article{Hufsky:20b,
title = {The International Virus Bioinformatics Meeting 2020},
author = {Franziska Hufsky and Niko Beerenwinkel and Irmtraud M. Meyer and Simon Roux and Georgia May Cook and Cormac M. Kinsella and Kevin Lamkiewicz and Mike Marquet and David F. Nieuwenhuijse and Ingrida Olendraite and Sofia Paraskevopoulou and Francesca Young and Ronald Dijkman and Bashar Ibrahim and Jenna Kelly and Philippe Le Mercier and Manja Marz and Alban Ramette and Volker Thiel},
doi = {10.3390/v12121398},
year = {2020},
date = {2020-12-06},
urldate = {2020-01-01},
journal = {Viruses},
volume = {12},
number = {12},
pages = {1398},
publisher = {MDPI AG},
abstract = {The International Virus Bioinformatics Meeting 2020 was originally planned to take place in Bern, Switzerland, in March 2020. However, the COVID-19 pandemic put a spoke in the wheel of almost all conferences to be held in 2020. After moving the conference to 8–9 October 2020, we got hit by the second wave and finally decided at short notice to go fully online. On the other hand, the pandemic has made us even more aware of the importance of accelerating research in viral bioinformatics. Advances in bioinformatics have led to improved approaches to investigate viral infections and outbreaks. The International Virus Bioinformatics Meeting 2020 has attracted approximately 120 experts in virology and bioinformatics from all over the world to join the two-day virtual meeting. Despite concerns being raised that virtual meetings lack possibilities for face-to-face discussion, the participants from this small community created a highly interactive scientific environment, engaging in lively and inspiring discussions and suggesting new research directions and questions. The meeting featured five invited and twelve contributed talks, on the four main topics: (1) proteome and RNAome of RNA viruses, (2) viral metagenomics and ecology, (3) virus evolution and classification and (4) viral infections and immunology. Further, the meeting featured 20 oral poster presentations, all of which focused on specific areas of virus bioinformatics. This report summarizes the main research findings and highlights presented at the meeting.},
keywords = {classification, conference report, evolution, metagenomics, software, viruses},
pubstate = {published},
tppubtype = {article}
}
Kalvari, Ioanna; Nawrocki, Eric P; Ontiveros-Palacios, Nancy; Argasinska, Joanna; Lamkiewicz, Kevin; Marz, Manja; Griffiths-Jones, Sam; Toffano-Nioche, Claire; Gautheret, Daniel; Weinberg, Zasha; Rivas, Elena; Eddy, Sean R; Finn, Robert D; Bateman, Alex; Petrov, Anton I
Rfam 14: expanded coverage of metagenomic, viral and microRNA families Journal Article
In: Nucleic Acids Res, vol. 49, no. D1, pp. D192–D200, 2020.
Abstract | Links | BibTeX | Tags: alignment, annotation, bacteria, coronavirus, database, metagenomics, ncRNAs, RNA / transcriptomics, software, viruses
@article{Kalvari:21,
title = {Rfam 14: expanded coverage of metagenomic, viral and microRNA families},
author = {Ioanna Kalvari and Eric P Nawrocki and Nancy Ontiveros-Palacios and Joanna Argasinska and Kevin Lamkiewicz and Manja Marz and Sam Griffiths-Jones and Claire Toffano-Nioche and Daniel Gautheret and Zasha Weinberg and Elena Rivas and Sean R Eddy and Robert D Finn and Alex Bateman and Anton I Petrov},
url = {https://rfam.org/},
doi = {10.1093/nar/gkaa1047},
year = {2020},
date = {2020-11-19},
urldate = {2020-11-19},
journal = {Nucleic Acids Res},
volume = {49},
number = {D1},
pages = {D192--D200},
publisher = {Oxford University Press (OUP)},
abstract = {Rfam is a database of RNA families where each of the 3444 families is represented by a multiple sequence alignment of known RNA sequences and a covariance model that can be used to search for additional members of the family. Recent developments have involved expert collaborations to improve the quality and coverage of Rfam data, focusing on microRNAs, viral and bacterial RNAs. We have completed the first phase of synchronising microRNA families in Rfam and miRBase, creating 356 new Rfam families and updating 40. We established a procedure for comprehensive annotation of viral RNA families starting with Flavivirus and Coronaviridae RNAs. We have also increased the coverage of bacterial and metagenome-based RNA families from the ZWD database. These developments have enabled a significant growth of the database, with the addition of 759 new families in Rfam 14. To facilitate further community contribution to Rfam, expert users are now able to build and submit new families using the newly developed Rfam Cloud family curation system. New Rfam website features include a new sequence similarity search powered by RNAcentral, as well as search and visualisation of families with pseudoknots. Rfam is freely available at https://rfam.org.},
keywords = {alignment, annotation, bacteria, coronavirus, database, metagenomics, ncRNAs, RNA / transcriptomics, software, viruses},
pubstate = {published},
tppubtype = {article}
}
Hufsky, Franziska; Lamkiewicz, Kevin; Almeida, Alexandre; Aouacheria, Abdel; Arighi, Cecilia; Bateman, Alex; Baumbach, Jan; Beerenwinkel, Niko; Brandt, Christian; Cacciabue, Marco; Chuguransky, Sara; Drechsel, Oliver; Finn, Robert D; Fritz, Adrian; Fuchs, Stephan; Hattab, Georges; Hauschild, Anne-Christin; Heider, Dominik; Hoffmann, Marie; Hölzer, Martin; Hoops, Stefan; Kaderali, Lars; Kalvari, Ioanna; Kleist, Max; Kmiecinski, Renó; Kühnert, Denise; Lasso, Gorka; Libin, Pieter; List, Markus; Löchel, Hannah F; Martin, Maria J; Martin, Roman; Matschinske, Julian; McHardy, Alice C; Mendes, Pedro; Mistry, Jaina; Navratil, Vincent; Nawrocki, Eric P; O'Toole, Áine Niamh; Ontiveros-Palacios, Nancy; Petrov, Anton I; Rangel-Pineros, Guillermo; Redaschi, Nicole; Reimering, Susanne; Reinert, Knut; Reyes, Alejandro; Richardson, Lorna; Robertson, David L; Sadegh, Sepideh; Singer, Joshua B; Theys, Kristof; Upton, Chris; Welzel, Marius; Williams, Lowri; Marz, Manja
Computational strategies to combat COVID-19: useful tools to accelerate SARS-CoV-2 and coronavirus research Journal Article
In: Brief Bioinform, vol. 22, no. 2, pp. 642–663, 2020.
Abstract | Links | BibTeX | Tags: coronavirus, evolution, review, software, viruses
@article{Hufsky:20a,
title = {Computational strategies to combat COVID-19: useful tools to accelerate SARS-CoV-2 and coronavirus research},
author = {Franziska Hufsky and Kevin Lamkiewicz and Alexandre Almeida and Abdel Aouacheria and Cecilia Arighi and Alex Bateman and Jan Baumbach and Niko Beerenwinkel and Christian Brandt and Marco Cacciabue and Sara Chuguransky and Oliver Drechsel and Robert D Finn and Adrian Fritz and Stephan Fuchs and Georges Hattab and Anne-Christin Hauschild and Dominik Heider and Marie Hoffmann and Martin Hölzer and Stefan Hoops and Lars Kaderali and Ioanna Kalvari and Max Kleist and Renó Kmiecinski and Denise Kühnert and Gorka Lasso and Pieter Libin and Markus List and Hannah F Löchel and Maria J Martin and Roman Martin and Julian Matschinske and Alice C McHardy and Pedro Mendes and Jaina Mistry and Vincent Navratil and Eric P Nawrocki and Áine Niamh O'Toole and Nancy Ontiveros-Palacios and Anton I Petrov and Guillermo Rangel-Pineros and Nicole Redaschi and Susanne Reimering and Knut Reinert and Alejandro Reyes and Lorna Richardson and David L Robertson and Sepideh Sadegh and Joshua B Singer and Kristof Theys and Chris Upton and Marius Welzel and Lowri Williams and Manja Marz},
url = {http://evbc.uni-jena.de/tools/coronavirus-tools/},
doi = {10.1093/bib/bbaa232},
year = {2020},
date = {2020-11-04},
urldate = {2020-11-04},
journal = {Brief Bioinform},
volume = {22},
number = {2},
pages = {642--663},
publisher = {Oxford University Press (OUP)},
abstract = {SARS-CoV-2 (severe acute respiratory syndrome coronavirus 2) is a novel virus of the family Coronaviridae. The virus causes the infectious disease COVID-19. The biology of coronaviruses has been studied for many years. However, bioinformatics tools designed explicitly for SARS-CoV-2 have only recently been developed as a rapid reaction to the need for fast detection, understanding and treatment of COVID-19. To control the ongoing COVID-19 pandemic, it is of utmost importance to get insight into the evolution and pathogenesis of the virus. In this review, we cover bioinformatics workflows and tools for the routine detection of SARS-CoV-2 infection, the reliable analysis of sequencing data, the tracking of the COVID-19 pandemic and evaluation of containment measures, the study of coronavirus evolution, the discovery of potential drug targets and development of therapeutic strategies. For each tool, we briefly describe its use case and how it advances research specifically for SARS-CoV-2. All tools are free to use and available online, either through web applications or public code repositories.},
keywords = {coronavirus, evolution, review, software, viruses},
pubstate = {published},
tppubtype = {article}
}
Collatz, Maximilian; Mock, Florian; Barth, Emanuel; Hölzer, Martin; Sachse, Konrad; Marz, Manja
EpiDope: A Deep Neural Network for linear B-cell epitope prediction Journal Article
In: Bioinformatics, vol. 37, no. 4, pp. 448–455, 2020.
Abstract | Links | BibTeX | Tags: machine learning, software, virus host interaction, viruses
@article{Collatz:20,
title = {EpiDope: A Deep Neural Network for linear B-cell epitope prediction},
author = {Maximilian Collatz and Florian Mock and Emanuel Barth and Martin Hölzer and Konrad Sachse and Manja Marz},
editor = {Lenore Cowen},
url = {https://github.com/rnajena/EpiDope},
doi = {10.1093/bioinformatics/btaa773},
year = {2020},
date = {2020-09-11},
urldate = {2020-09-11},
journal = {Bioinformatics},
volume = {37},
number = {4},
pages = {448–455},
publisher = {Oxford University Press (OUP)},
abstract = {By binding to specific structures on antigenic proteins, the so-called epitopes, B-cell antibodies can neutralize pathogens. The identification of B-cell epitopes is of great value for the development of specific serodiagnostic assays and the optimization of medical therapy. However, identifying diagnostically or therapeutically relevant epitopes is a challenging task that usually involves extensive laboratory work. In this study, we show that the time, cost and labor-intensive process of epitope detection in the lab can be significantly reduced using in silico prediction.
Here, we present EpiDope, a python tool which uses a deep neural network to detect linear B-cell epitope regions on individual protein sequences. With an area under the curve between 0.67 ± 0.07 in the receiver operating characteristic curve, EpiDope exceeds all other currently used linear B-cell epitope prediction tools. Our software is shown to reliably predict linear B-cell epitopes of a given protein sequence, thus contributing to a significant reduction of laboratory experiments and costs required for the conventional approach.},
keywords = {machine learning, software, virus host interaction, viruses},
pubstate = {published},
tppubtype = {article}
}
Here, we present EpiDope, a python tool which uses a deep neural network to detect linear B-cell epitope regions on individual protein sequences. With an area under the curve between 0.67 ± 0.07 in the receiver operating characteristic curve, EpiDope exceeds all other currently used linear B-cell epitope prediction tools. Our software is shown to reliably predict linear B-cell epitopes of a given protein sequence, thus contributing to a significant reduction of laboratory experiments and costs required for the conventional approach.
Mock, Florian; Viehweger, Adrian; Barth, Emanuel; Marz, Manja
VIDHOP, viral host prediction with Deep Learning Journal Article
In: Bioinformatics, vol. 37, no. 3, pp. 318–325, 2020.
Abstract | Links | BibTeX | Tags: machine learning, software, virus host interaction, viruses
@article{Mock:20,
title = {VIDHOP, viral host prediction with Deep Learning},
author = {Florian Mock and Adrian Viehweger and Emanuel Barth and Manja Marz},
editor = {Jinbo Xu},
url = {https://github.com/rnajena/vidhop},
doi = {10.1093/bioinformatics/btaa705},
year = {2020},
date = {2020-08-10},
urldate = {2020-08-10},
journal = {Bioinformatics},
volume = {37},
number = {3},
pages = {318–325},
publisher = {Oxford University Press (OUP)},
abstract = {Zoonosis, the natural transmission of infections from animals to humans, is a far-reaching global problem. The recent outbreaks of Zikavirus, Ebolavirus and Coronavirus are examples of viral zoonosis, which occur more frequently due to globalization. In case of a virus outbreak, it is helpful to know which host organism was the original carrier of the virus to prevent further spreading of viral infection. Recent approaches aim to predict a viral host based on the viral genome, often in combination with the potential host genome and arbitrarily selected features. These methods are limited in the number of different hosts they can predict or the accuracy of the prediction.
Here, we present a fast and accurate deep learning approach for viral host prediction, which is based on the viral genome sequence only. We tested our deep neural network (DNN) on three different virus species (influenza A virus, rabies lyssavirus and rotavirus A). We achieved for each virus species an AUC between 0.93 and 0.98, allowing highly accurate predictions while using only fractions (100–400 bp) of the viral genome sequences. We show that deep neural networks are suitable to predict the host of a virus, even with a limited amount of sequences and highly unbalanced available data. The trained DNNs are the core of our virus–host prediction tool VIrus Deep learning HOst Prediction (VIDHOP). VIDHOP also allows the user to train and use models for other viruses.},
keywords = {machine learning, software, virus host interaction, viruses},
pubstate = {published},
tppubtype = {article}
}
Here, we present a fast and accurate deep learning approach for viral host prediction, which is based on the viral genome sequence only. We tested our deep neural network (DNN) on three different virus species (influenza A virus, rabies lyssavirus and rotavirus A). We achieved for each virus species an AUC between 0.93 and 0.98, allowing highly accurate predictions while using only fractions (100–400 bp) of the viral genome sequences. We show that deep neural networks are suitable to predict the host of a virus, even with a limited amount of sequences and highly unbalanced available data. The trained DNNs are the core of our virus–host prediction tool VIrus Deep learning HOst Prediction (VIDHOP). VIDHOP also allows the user to train and use models for other viruses.
Hölzer, Martin; Marz, Manja
PoSeiDon: a Nextflow pipeline for the detection of evolutionary recombination events and positive selection Journal Article
In: Bioinformatics, vol. 37, no. 7, pp. 1018-1020, 2020.
Abstract | Links | BibTeX | Tags: alignment, evolution, phylogenetics, software
@article{Hoelzer:20a,
title = {PoSeiDon: a Nextflow pipeline for the detection of evolutionary recombination events and positive selection},
author = {Martin Hölzer and Manja Marz},
editor = {Alfonso Valencia},
url = {https://github.com/rnajena/poseidon},
doi = {10.1093/bioinformatics/btaa695},
year = {2020},
date = {2020-07-31},
urldate = {2020-07-31},
journal = {Bioinformatics},
volume = {37},
number = {7},
pages = {1018-1020},
publisher = {Oxford University Press (OUP)},
abstract = {PoSeiDon is an easy-to-use pipeline that helps researchers to find recombination events and sites under positive selection in protein-coding sequences. By entering homologous sequences, PoSeiDon builds an alignment, estimates a best-fitting substitution model and performs a recombination analysis followed by the construction of all corresponding phylogenies. Finally, significantly positive selected sites are detected according to different models for the full alignment and possible recombination fragments. The results of PoSeiDon are summarized in a user-friendly HTML page providing all intermediate results and the graphical representation of recombination events and positively selected sites.
},
keywords = {alignment, evolution, phylogenetics, software},
pubstate = {published},
tppubtype = {article}
}
Hufsky, Franziska; Lamkiewicz, Kevin; Almeida, Alexandre; Aouacheria, Abdel; Arighi, Cecilia; Bateman, Alex; Baumbach, Jan; Beerenwinkel, Niko; Brandt, Christian; Cacciabue, Marco; Chuguransky, Sara; Drechsel, Oliver; Finn, Robert D.; Fritz, Adrian; Fuchs, Stephan; Hattab, Georges; Hauschild, Anne-Christin; Heider, Dominik; Hoffmann, Marie; Hölzer, Martin; Hoops, Stefan; Kaderali, Lars; Kalvari, Ioanna; Kleist, Max; Kmiecinski, Rene; Kühnert, Denise; Lasso, Gorka; Libin, Pieter; List, Markus; Löchel, Hannah F.; Martin, Maria J.; Martin, Roman; Matschinske, Julian; McHardy, Alice C.; Mendes, Pedro; Mistry, Jaina; Navratil, Vincent; Nawrocki, Eric; O'Toole, Áine Niamh; Palacios-Ontiveros, Nancy; Petrov, Anton I.; Rangel-Piñeros, Guillermo; Redaschi, Nicole; Reimering, Susanne; Reinert, Knut; Reyes, Alejandro; Richardson, Lorna; Robertson, David L.; Sadegh, Sepideh; Singer, Joshua B.; Theys, Kristof; Upton, Chris; Welzel, Marius; Williams, Lowri; Marz, Manja
Computational Strategies to Combat COVID-19: Useful Tools to Accelerate SARS-CoV-2 and Coronavirus Research Journal Article
In: Preprints, 2020, (Now published in Brief Bioinform: https://dx.doi.org/10.1093/bib/bbaa232).
Abstract | Links | BibTeX | Tags: coronavirus, evolution, review, software, viruses
@article{Hufsky:20,
title = {Computational Strategies to Combat COVID-19: Useful Tools to Accelerate SARS-CoV-2 and Coronavirus Research},
author = {Franziska Hufsky and Kevin Lamkiewicz and Alexandre Almeida and Abdel Aouacheria and Cecilia Arighi and Alex Bateman and Jan Baumbach and Niko Beerenwinkel and Christian Brandt and Marco Cacciabue and Sara Chuguransky and Oliver Drechsel and Robert D. Finn and Adrian Fritz and Stephan Fuchs and Georges Hattab and Anne-Christin Hauschild and Dominik Heider and Marie Hoffmann and Martin Hölzer and Stefan Hoops and Lars Kaderali and Ioanna Kalvari and Max Kleist and Rene Kmiecinski and Denise Kühnert and Gorka Lasso and Pieter Libin and Markus List and Hannah F. Löchel and Maria J. Martin and Roman Martin and Julian Matschinske and Alice C. McHardy and Pedro Mendes and Jaina Mistry and Vincent Navratil and Eric Nawrocki and Áine Niamh O'Toole and Nancy Palacios-Ontiveros and Anton I. Petrov and Guillermo Rangel-Piñeros and Nicole Redaschi and Susanne Reimering and Knut Reinert and Alejandro Reyes and Lorna Richardson and David L. Robertson and Sepideh Sadegh and Joshua B. Singer and Kristof Theys and Chris Upton and Marius Welzel and Lowri Williams and Manja Marz},
doi = {10.20944/preprints202005.0376.v1},
year = {2020},
date = {2020-05-23},
urldate = {2020-05-23},
journal = {Preprints},
publisher = {MDPI AG},
abstract = {SARS-CoV-2 (severe acute respiratory syndrome coronavirus 2) is a novel virus of the family Coronaviridae. The virus causes the infectious disease COVID-19. The biology of coronaviruses has been studied for many years. However, bioinformatics tools designed explicitly for SARS-CoV-2 have only recently been developed as a rapid reaction to the need for fast detection, understanding, and treatment of COVID-19. To control the ongoing COVID-19 pandemic, it is of utmost importance to get insight into the evolution and pathogenesis of the virus. In this review, we cover bioinformatics workflows and tools for the routine detection of SARS-CoV-2 infection, the reliable analysis of sequencing data, the tracking of the COVID-19 pandemic and evaluation of containment measures, the study of coronavirus evolution, the discovery of potential drug targets and development of therapeutic strategies. For each tool, we briefly describe its use case and how it advances research specifically for SARS-CoV-2. All tools are freely available online, either through web applications or public code repositories.
},
note = {Now published in Brief Bioinform: https://dx.doi.org/10.1093/bib/bbaa232},
keywords = {coronavirus, evolution, review, software, viruses},
pubstate = {published},
tppubtype = {article}
}
Hölzer, Martin; Barf, Lisa-Marie; Lamkiewicz, Kevin; Vorimore, Fabien; Lataretu, Marie; Favaroni, Alison; Schnee, Christiane; Laroucau, Karine; Marz, Manja; Sachse, Konrad
Comparative Genome Analysis of 33 Chlamydia Strains Reveals Characteristic Features of Chlamydia Psittaci and Closely Related Species Journal Article
In: Pathogens, vol. 9, no. 11, pp. 899, 2020.
Abstract | Links | BibTeX | Tags: annotation, bacteria, DNA / genomics, software
@article{Hölzer:20,
title = {Comparative Genome Analysis of 33 \textit{Chlamydia} Strains Reveals Characteristic Features of \textit{Chlamydia Psittaci} and Closely Related Species},
author = {Martin Hölzer and Lisa-Marie Barf and Kevin Lamkiewicz and Fabien Vorimore and Marie Lataretu and Alison Favaroni and Christiane Schnee and Karine Laroucau and Manja Marz and Konrad Sachse},
url = {github.com/hoelzer-lab/ribap},
doi = {10.3390/pathogens9110899},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {Pathogens},
volume = {9},
number = {11},
pages = {899},
publisher = {MDPI AG},
abstract = {To identify genome-based features characteristic of the avian and human pathogen Chlamydia (C.) psittaci and related chlamydiae, we analyzed whole-genome sequences of 33 strains belonging to 12 species. Using a novel genome analysis tool termed Roary ILP Bacterial Annotation Pipeline (RIBAP), this panel of strains was shown to share a large core genome comprising 784 genes and representing approximately 80% of individual genomes. Analyzing the most variable genomic sites, we identified a set of features of C. psittaci that in its entirety is characteristic of this species: (i) a relatively short plasticity zone of less than 30,000 nt without a tryptophan operon (also in C. abortus, C. avium, C. gallinacea, C. pneumoniae), (ii) a characteristic set of of Inc proteins comprising IncA, B, C, V, X, Y (with homologs in C. abortus, C. caviae and C. felis as closest relatives), (iii) a 502-aa SinC protein, the largest among Chlamydia spp., and (iv) an elevated number of Pmp proteins of subtype G (14 in C. psittaci, 14 in Cand. C. ibidis). In combination with future functional studies, the common and distinctive criteria revealed in this study provide important clues for understanding the complexity of host-specific behavior of individual Chlamydia spp.},
keywords = {annotation, bacteria, DNA / genomics, software},
pubstate = {published},
tppubtype = {article}
}
2019
Hufsky, Franziska; Ibrahim, Bashar; Modha, Sejal; Clokie, Martha R. J.; Deinhardt-Emmer, Stefanie; Dutilh, Bas E.; Lycett, Samantha; Simmonds, Peter; Thiel, Volker; Abroi, Aare; Adriaenssens, Evelien M.; Escalera-Zamudio, Marina; Kelly, Jenna Nicole; Lamkiewicz, Kevin; Lu, Lu; Susat, Julian; Sicheritz, Thomas; Robertson, David L.; Marz, Manja
The Third Annual Meeting of the European Virus Bioinformatics Center Journal Article
In: Viruses, vol. 11, no. 5, pp. 420, 2019.
Abstract | Links | BibTeX | Tags: classification, conference report, evolution, metagenomics, software, virus host interaction, viruses
@article{Hufsky:19,
title = {The Third Annual Meeting of the European Virus Bioinformatics Center},
author = {Franziska Hufsky and Bashar Ibrahim and Sejal Modha and Martha R. J. Clokie and Stefanie Deinhardt-Emmer and Bas E. Dutilh and Samantha Lycett and Peter Simmonds and Volker Thiel and Aare Abroi and Evelien M. Adriaenssens and Marina Escalera-Zamudio and Jenna Nicole Kelly and Kevin Lamkiewicz and Lu Lu and Julian Susat and Thomas Sicheritz and David L. Robertson and Manja Marz},
doi = {10.3390/v11050420},
year = {2019},
date = {2019-05-05},
urldate = {2019-05-05},
journal = {Viruses},
volume = {11},
number = {5},
pages = {420},
publisher = {MDPI AG},
abstract = {The Third Annual Meeting of the European Virus Bioinformatics Center (EVBC) took place in Glasgow, United Kingdom, 28–29 March 2019. Virus bioinformatics has become central to virology research, and advances in bioinformatics have led to improved approaches to investigate viral infections and outbreaks, being successfully used to detect, control, and treat infections of humans and animals. This active field of research has attracted approximately 110 experts in virology and bioinformatics/computational biology from Europe and other parts of the world to attend the two-day meeting in Glasgow to increase scientific exchange between laboratory- and computer-based researchers. The meeting was held at the McIntyre Building of the University of Glasgow; a perfect location, as it was originally built to be a place for “rubbing your brains with those of other people”, as Rector Stanley Baldwin described it. The goal of the meeting was to provide a meaningful and interactive scientific environment to promote discussion and collaboration and to inspire and suggest new research directions and questions. The meeting featured eight invited and twelve contributed talks, on the four main topics: (1) systems virology, (2) virus-host interactions and the virome, (3) virus classification and evolution and (4) epidemiology, surveillance and evolution. Further, the meeting featured 34 oral poster presentations, all of which focused on specific areas of virus bioinformatics. This report summarizes the main research findings and highlights presented at the meeting. },
keywords = {classification, conference report, evolution, metagenomics, software, virus host interaction, viruses},
pubstate = {published},
tppubtype = {article}
}
2018
Desiro, Daniel; Hölzer, Martin; Ibrahim, Bashar; Marz, Manja
SilentMutations (SIM): a tool for analyzing long-range RNA-RNA interactions in viral genomes and structured RNAs Journal Article
In: Virus Res, vol. 260, pp. 135-141, 2018.
Abstract | Links | BibTeX | Tags: RNA structure, RNA-RNA interactions, software, viruses
@article{Desiro:18,
title = {SilentMutations (SIM): a tool for analyzing long-range RNA-RNA interactions in viral genomes and structured RNAs},
author = {Daniel Desiro and Martin Hölzer and Bashar Ibrahim and Manja Marz},
url = {https://github.com/desiro/silentMutations},
doi = {10.1016/j.virusres.2018.11.005},
year = {2018},
date = {2018-11-12},
urldate = {2018-11-12},
journal = {Virus Res},
volume = {260},
pages = {135-141},
abstract = {A single nucleotide change in the coding region can alter the amino acid sequence of a protein. In consequence, natural or artificial sequence changes in viral RNAs may have various effects not only on protein stability, function and structure but also on viral replication. In recent decades, several tools have been developed to predict the effect of mutations in structured RNAs such as viral genomes or non-coding RNAs. Some tools use multiple point mutations and also take coding regions into account. However, none of these tools was designed to specifically simulate the effect of mutations on viral long-range interactions. Here, we developed SilentMutations (SIM), an easy-to-use tool to analyze the effect of multiple point mutations on the secondary structures of two interacting viral RNAs. The tool can simulate disruptive and compensatory mutants of two interacting single-stranded RNAs. This allows a fast and accurate assessment of key regions potentially involved in functional long-range RNA-RNA interactions and will eventually help virologists and RNA-experts to design appropriate experiments. SIM only requires two interacting single-stranded RNA regions as input. The output is a plain text file containing the most promising mutants and a graphical representation of all interactions. We applied our tool on two experimentally validated influenza A virus and hepatitis C virus interactions and we were able to predict potential double mutants for in vitro validation experiments. The source code and documentation of SIM are freely available at github.com/desiro/silentMutations.},
keywords = {RNA structure, RNA-RNA interactions, software, viruses},
pubstate = {published},
tppubtype = {article}
}
Gerst, Ruman; Hölzer, Martin
PCAGO: An interactive web service to analyze RNA-Seq data with principal component analysis Journal Article
In: bioRxiv, pp. 433078, 2018.
Abstract | Links | BibTeX | Tags: annotation, differential expression analysis, RNA / transcriptomics, software
@article{Gerst:18,
title = {PCAGO: An interactive web service to analyze RNA-Seq data with principal component analysis},
author = {Ruman Gerst and Martin Hölzer},
url = {https://github.com/rnajena/pcago-unified},
doi = {10.1101/433078},
year = {2018},
date = {2018-10-03},
urldate = {2018-10-03},
journal = {bioRxiv},
pages = {433078},
publisher = {Cold Spring Harbor Laboratory},
abstract = {The initial characterization and clustering of biological samples is a critical step in the analysis of any transcriptomics study. In many studies, principal component analysis (PCA) is the clustering algorithm of choice to predict the relationship of samples or cells based solely on differential gene expression. In addition to the pure quality evaluation of the data, a PCA can also provide initial insights into the biological background of an experiment and help researchers to interpret the data and design the subsequent computational steps accordingly. However, to avoid misleading clusterings and interpretations, an appropriate selection of the underlying gene sets to build the PCA and the choice of the most fitting principal components for the visualization are crucial parts. Here, we present PCAGO, an easy-to-use and interactive tool to analyze gene quantification data derived from RNA sequencing experiments with PCA. The tool includes features such as read-count normalization, filtering of read counts by gene annotation, and various visualization options. In addition, PCAGO helps to select appropriate parameters such as the number of genes and principal components to create meaningful visualizations.},
keywords = {annotation, differential expression analysis, RNA / transcriptomics, software},
pubstate = {published},
tppubtype = {article}
}
Ibrahim, Bashar; Arkhipova, Ksenia; Andeweg, Arno C.; Posada-Céspedes, Susana; Enault, François; Gruber, Arthur; Koonin, Eugene V.; Kupczok, Anne; Lemey, Philippe; McHardy, Alice C.; McMahon, Dino P.; Pickett, Brett E.; Robertson, David L.; Scheuermann, Richard H.; Zhernakova, Alexandra; Zwart, Mark P.; Schönhuth, Alexander; Dutilh, Bas E.; Marz, Manja
Bioinformatics Meets Virology: The European Virus Bioinformatics Center's Second Annual Meeting Journal Article
In: Viruses, vol. 10, 2018.
Abstract | Links | BibTeX | Tags: conference report, evolution, software, viruses
@article{Ibrahim:18,
title = {Bioinformatics Meets Virology: The European Virus Bioinformatics Center's Second Annual Meeting},
author = {Bashar Ibrahim and Ksenia Arkhipova and Arno C. Andeweg and Susana Posada-Céspedes and François Enault and Arthur Gruber and Eugene V. Koonin and Anne Kupczok and Philippe Lemey and Alice C. McHardy and Dino P. McMahon and Brett E. Pickett and David L. Robertson and Richard H. Scheuermann and Alexandra Zhernakova and Mark P. Zwart and Alexander Schönhuth and Bas E. Dutilh and Manja Marz},
doi = {10.3390/v10050256},
year = {2018},
date = {2018-05-14},
urldate = {2018-05-14},
journal = {Viruses},
volume = {10},
abstract = {The Second Annual Meeting of the European Virus Bioinformatics Center (EVBC), held in Utrecht, Netherlands, focused on computational approaches in virology, with topics including (but not limited to) virus discovery, diagnostics, (meta-)genomics, modeling, epidemiology, molecular structure, evolution, and viral ecology. The goals of the Second Annual Meeting were threefold: (i) to bring together virologists and bioinformaticians from across the academic, industrial, professional, and training sectors to share best practice; (ii) to provide a meaningful and interactive scientific environment to promote discussion and collaboration between students, postdoctoral fellows, and both new and established investigators; (iii) to inspire and suggest new research directions and questions. Approximately 120 researchers from around the world attended the Second Annual Meeting of the EVBC this year, including 15 renowned international speakers. This report presents an overview of new developments and novel research findings that emerged during the meeting.},
keywords = {conference report, evolution, software, viruses},
pubstate = {published},
tppubtype = {article}
}
Ibrahim, Bashar; McMahon, Dino P; Hufsky, Franziska; Beer, Martin; Deng, Li; Mercier, Philippe Le; Palmarini, Massimo; Thiel, Volker; Marz, Manja
A new era of virus bioinformatics Journal Article
In: Virus Res, vol. 251, pp. 86–90, 2018.
Abstract | Links | BibTeX | Tags: review, software, viruses
@article{Ibrahim:18a,
title = {A new era of virus bioinformatics},
author = {Bashar Ibrahim and Dino P McMahon and Franziska Hufsky and Martin Beer and Li Deng and Philippe Le Mercier and Massimo Palmarini and Volker Thiel and Manja Marz},
doi = {10.1016/j.virusres.2018.05.009},
year = {2018},
date = {2018-05-08},
urldate = {2018-01-01},
journal = {Virus Res},
volume = {251},
pages = {86--90},
abstract = {Despite the recognized excellence of virology and bioinformatics, these two communities have interacted surprisingly sporadically, aside from some pioneering work on HIV-1 and influenza. Bringing together the expertise of bioinformaticians and virologists is crucial, since very specific but fundamental computational approaches are required for virus research, particularly in an era of big data. Collaboration between virologists and bioinformaticians is necessary to improve existing analytical tools, cloud-based systems, computational resources, data sharing approaches, new diagnostic tools, and bioinformatic training. Here, we highlight current progress and discuss potential avenues for future developments in this promising era of virus bioinformatics. We end by presenting an overview of current technologies, and by outlining some of the major challenges and advantages that bioinformatics will bring to the field of virology.},
keywords = {review, software, viruses},
pubstate = {published},
tppubtype = {article}
}
Hufsky, Franziska; Ibrahim, Bashar; Beer, Martin; Deng, Li; Mercier, Philippe Le; McMahon, Dino P; Palmarini, Massimo; Thiel, Volker; Marz, Manja
Virologists-Heroes need weapons Journal Article
In: PLoS Pathog, vol. 14, no. 2, pp. e1006771, 2018.
Abstract | Links | BibTeX | Tags: review, software, viruses
@article{Hufsky:18,
title = {Virologists-Heroes need weapons},
author = {Franziska Hufsky and Bashar Ibrahim and Martin Beer and Li Deng and Philippe Le Mercier and Dino P McMahon and Massimo Palmarini and Volker Thiel and Manja Marz},
doi = {10.1371/journal.ppat.1006771},
year = {2018},
date = {2018-02-08},
urldate = {2018-02-08},
journal = {PLoS Pathog},
volume = {14},
number = {2},
pages = {e1006771},
abstract = {Virologists. You might know a couple of them, but unless you are a virologist yourself, the probability that you have collaborated with one in the past is low. The community is relatively small, but they pack a heavy punch and are expected to play a leading role in the research into pathogens that lies ahead. You may ask why we think virologists are our future. Suffice it to say that it is not just because they have invented technologies that belong to the space age, including use of viruses as vehicles to shuttle genes into cells[1], organic nanoparticles with specific tools attached to their surfaces to get inside target cells[2], and using genetically modified viruses as therapies to fight against cancer[3]. Did you know that virologists currently only know of about 3,200 viral species but that more than 320,000 mammal-associated viruses[4] are thought to await discovery? Just think about the viruses hidden in the Arctic ice[5] or in the insects and other animals from once cut-off regions in the world, which now face ever-increasing human exposure[6]. But a heroic (as well as an apocalyptic) role for virologists may also be on the horizon, as the adoption of phage therapy may, in the future, be used to control harmful bacteria when antibiotics fail[7].},
keywords = {review, software, viruses},
pubstate = {published},
tppubtype = {article}
}
2017
Hölzer, Martin; Marz, Manja
Software Dedicated to Virus Sequence Analysis Journal Article
In: Adv Virus Res, vol. 99, pp. 233–257, 2017.
Abstract | Links | BibTeX | Tags: DNA / genomics, evolution, phylogenetics, review, RNA / transcriptomics, RNA structure, software, viruses
@article{Hoelzer:17,
title = {Software Dedicated to Virus Sequence Analysis },
author = {Martin Hölzer and Manja Marz},
doi = {10.1016/bs.aivir.2017.08.004},
year = {2017},
date = {2017-09-28},
urldate = {2017-09-28},
journal = {Adv Virus Res},
volume = {99},
pages = {233--257},
abstract = {Computer-assisted technologies of the genomic structure, biological function, and evolution of viruses remain a largely neglected area of research. The attention of bioinformaticians to this challenging field is currently unsatisfying in respect to its medical and biological importance. The power of new genome sequencing technologies, associated with new tools to handle "big data", provides unprecedented opportunities to address fundamental questions in virology. Here, we present an overview of the current technologies, challenges, and advantages of Next-Generation Sequencing (NGS) in relation to the field of virology. We present how viral sequences can be detected de novo out of current short-read NGS data. Furthermore, we discuss the challenges and applications of viral quasispecies and how secondary structures, commonly shaped by RNA viruses, can be computationally predicted. The phylogenetic analysis of viruses, as another ubiquitous field in virology, forms an essential element of describing viral epidemics and challenges current algorithms. Recently, the first specialized virus-bioinformatic organizations have been established. We need to bring together virologists and bioinformaticians and provide a platform for the implementation of interdisciplinary collaborative projects at local and international scales. Above all, there is an urgent need for dedicated software tools to tackle various challenges in virology.},
keywords = {DNA / genomics, evolution, phylogenetics, review, RNA / transcriptomics, RNA structure, software, viruses},
pubstate = {published},
tppubtype = {article}
}
2016
Winter, Sascha; Jahn, Katharina; Wehner, Stefanie; Kuchenbecker, Leon; Marz, Manja; Stoye, Jens; Böcker, Sebastian
Finding approximate gene clusters with Gecko 3 Journal Article
In: Nucleic Acids Res, vol. 44, pp. 9600–9610, 2016.
Abstract | Links | BibTeX | Tags: bacteria, DNA / genomics, evolution, software
@article{Winter:16,
title = {Finding approximate gene clusters with Gecko 3},
author = {Sascha Winter and Katharina Jahn and Stefanie Wehner and Leon Kuchenbecker and Manja Marz and Jens Stoye and Sebastian Böcker},
url = {http://bio.informatik.uni-jena.de/software/gecko3/},
doi = {10.1093/nar/gkw843},
year = {2016},
date = {2016-09-26},
urldate = {2016-09-26},
journal = {Nucleic Acids Res},
volume = {44},
pages = {9600--9610},
abstract = {Gene-order-based comparison of multiple genomes provides signals for functional analysis of genes and the evolutionary process of genome organization. Gene clusters are regions of co-localized genes on genomes of different species. The rapid increase in sequenced genomes necessitates bioinformatics tools for finding gene clusters in hundreds of genomes. Existing tools are often restricted to few (in many cases, only two) genomes, and often make restrictive assumptions such as short perfect conservation, conserved gene order or monophyletic gene clusters. We present Gecko 3, an open-source software for finding gene clusters in hundreds of bacterial genomes, that comes with an easy-to-use graphical user interface. The underlying gene cluster model is intuitive, can cope with low degrees of conservation as well as misannotations and is complemented by a sound statistical evaluation. To evaluate the biological benefit of Gecko 3 and to exemplify our method, we search for gene clusters in a dataset of 678 bacterial genomes using Synechocystis sp. PCC 6803 as a reference. We confirm detected gene clusters reviewing the literature and comparing them to a database of operons; we detect two novel clusters, which were confirmed by publicly available experimental RNA-Seq data. The computational analysis is carried out on a laptop computer in <40 min.},
keywords = {bacteria, DNA / genomics, evolution, software},
pubstate = {published},
tppubtype = {article}
}
Fricke, Markus; Marz, Manja
Prediction of conserved long-range RNA-RNA interactions in full viral genomes Journal Article
In: Bioinformatics, vol. 32, no. 19, pp. 2928–2935, 2016.
Abstract | Links | BibTeX | Tags: alignment, RNA / transcriptomics, RNA structure, RNA-RNA interactions, software, viruses
@article{Fricke:16,
title = {Prediction of conserved long-range RNA-RNA interactions in full viral genomes},
author = {Markus Fricke and Manja Marz},
url = {http://www.rna.uni-jena.de/en/supplements/lriscan/},
doi = {10.1093/bioinformatics/btw323},
year = {2016},
date = {2016-06-10},
urldate = {2016-06-10},
journal = {Bioinformatics},
volume = {32},
number = {19},
pages = {2928--2935},
abstract = {Long-range RNA-RNA interactions (LRIs) play an important role in viral replication, however, only a few of these interactions are known and only for a small number of viral species. Up to now, it has been impossible to screen a full viral genome for LRIs experimentally or in silico Most known LRIs are cross-reacting structures (pseudoknots) undetectable by most bioinformatical tools. We present LRIscan, a tool for the LRI prediction in full viral genomes based on a multiple genome alignment. We confirmed 14 out of 16 experimentally known and evolutionary conserved LRIs in genome alignments of HCV, Tombusviruses, Flaviviruses and HIV-1. We provide several promising new interactions, which include compensatory mutations and are highly conserved in all considered viral sequences. Furthermore, we provide reactivity plots highlighting the hot spots of predicted LRIs. Source code and binaries of LRIscan freely available for download at http://www.rna.uni-jena.de/en/supplements/lriscan/, implemented in Ruby/C ++ and supported on Linux and Windows. manja@uni-jena.de Supplementary data are available at Bioinformatics online.},
keywords = {alignment, RNA / transcriptomics, RNA structure, RNA-RNA interactions, software, viruses},
pubstate = {published},
tppubtype = {article}
}
2014
Qin, Jing; Fricke, Markus; Marz, Manja; Stadler, Peter F; Backofen, Rolf
Graph-distance distribution of the Boltzmann ensemble of RNA secondary structures Journal Article
In: Algorithms Mol Biol, vol. 9, pp. 19, 2014.
Abstract | Links | BibTeX | Tags: RNA / transcriptomics, RNA structure, software, splicing, viruses
@article{Qin:14,
title = {Graph-distance distribution of the Boltzmann ensemble of RNA secondary structures},
author = {Jing Qin and Markus Fricke and Manja Marz and Peter F Stadler and Rolf Backofen},
url = {http://www.rna.uni-jena.de/RNAgraphdist.html},
doi = {10.1186/1748-7188-9-19},
year = {2014},
date = {2014-09-11},
urldate = {2014-09-11},
journal = {Algorithms Mol Biol},
volume = {9},
pages = {19},
abstract = {Large RNA molecules are often composed of multiple functional domains whose spatial arrangement strongly influences their function. Pre-mRNA splicing, for instance, relies on the spatial proximity of the splice junctions that can be separated by very long introns. Similar effects appear in the processing of RNA virus genomes. Albeit a crude measure, the distribution of spatial distances in thermodynamic equilibrium harbors useful information on the shape of the molecule that in turn can give insights into the interplay of its functional domains. Spatial distance can be approximated by the graph-distance in RNA secondary structure. We show here that the equilibrium distribution of graph-distances between a fixed pair of nucleotides can be computed in polynomial time by means of dynamic programming. While a naïve implementation would yield recursions with a very high time complexity of O(n (6) D (5)) for sequence length n and D distinct distance values, it is possible to reduce this to O(n (4)) for practical applications in which predominantly small distances are of of interest. Further reductions, however, seem to be difficult. Therefore, we introduced sampling approaches that are much easier to implement. They are also theoretically favorable for several real-life applications, in particular since these primarily concern long-range interactions in very large RNA molecules. The graph-distance distribution can be computed using a dynamic programming approach. Although a crude approximation of reality, our initial results indicate that the graph-distance can be related to the smFRET data. The additional file and the software of our paper are available from http://www.rna.uni-jena.de/RNAgraphdist.html.},
keywords = {RNA / transcriptomics, RNA structure, software, splicing, viruses},
pubstate = {published},
tppubtype = {article}
}
2013
Backofen, Rolf; Fricke, Markus; Marz, Manja; Qin, Jing; Stadler, Peter F
Distribution of graph-distances in Boltzmann ensembles of RNA secondary structures Proceedings Article
In: International Workshop on Algorithms in Bioinformatics, pp. 112–125, 2013, (published in Algorithms Mol Biol: https://dx.doi.org/10.1186/1748-7188-9-19).
BibTeX | Tags: RNA / transcriptomics, RNA structure, software
@inproceedings{Backofen:13,
title = {Distribution of graph-distances in Boltzmann ensembles of RNA secondary structures},
author = {Rolf Backofen and Markus Fricke and Manja Marz and Jing Qin and Peter F Stadler},
year = {2013},
date = {2013-01-01},
urldate = {2013-01-01},
booktitle = {International Workshop on Algorithms in Bioinformatics},
pages = {112--125},
note = {published in Algorithms Mol Biol: https://dx.doi.org/10.1186/1748-7188-9-19},
keywords = {RNA / transcriptomics, RNA structure, software},
pubstate = {published},
tppubtype = {inproceedings}
}
Wieseke, Nicolas; Lechner, Marcus; Ludwig, Marcus; Marz, Manja
POMAGO: Multiple Genome-Wide Alignment Tool for Bacteria Proceedings Article
In: Cai, Zhipeng; Eulenstein, Oliver; Janies, Daniel; Schwartz, Daniel (Ed.): Proceedings of the 9th International Symposium on Bioinformatics Research and Applications (ISBRA 2013), Charlotte, NC, USA, May 20-22, 2013., pp. pp 249-260, Springer, 2013.
Abstract | Links | BibTeX | Tags: alignment, bacteria, phylogenetics, RNA structure, software
@inproceedings{Wieseke:13,
title = {POMAGO: Multiple Genome-Wide Alignment Tool for Bacteria},
author = {Nicolas Wieseke and Marcus Lechner and Marcus Ludwig and Manja Marz},
editor = {Zhipeng Cai and Oliver Eulenstein and Daniel Janies and Daniel Schwartz},
url = {http://www.rna.uni-jena.de/supplements/pomago},
doi = {10.1007/978-3-642-38036-5_25},
year = {2013},
date = {2013-01-01},
urldate = {2013-01-01},
booktitle = {Proceedings of the 9th International Symposium on Bioinformatics Research and Applications (ISBRA 2013), Charlotte, NC, USA, May 20-22, 2013.},
volume = {7875},
number = {1},
pages = {pp 249-260},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
abstract = {Multiple Genome-wide Alignments are a first crucial step to compare genomes. Gain and loss of genes, duplications and genomic rearrangements are challenging problems that aggravate with increasing phylogenetic distances. We describe a multiple genome-wide alignment tool for bacteria, called POMAGO, which is based on orthologous genes and their syntenic information determined by Proteinortho.This strategy enables POMAGO to efficiently define anchor points even across wide phylogenetic distances and outperform existing approaches in this field of application. The given set of orthologous genes is enhanced by several cleaning and completion steps, including the addition of previously undetected orthologous genes. Protein-coding genes are aligned on nucleotide and protein level, whereas intergenic regions are aligned on nucleotide level only. We tested and compared our program at three very different sets of bacteria that exhibit different degrees of phylogenetic distances: 1) 15 closely related, well examined and described E. coli species, 2) six more divergent Aquificales, as putative basal bacteria, and 3) a set of eight extreme divergent species, distributed among the whole phylogenetic tree of bacteria. POMAGO is written in a modular way which allows extending or even exchanging algorithms in different stages of the alignment process. Intergenic regions might for instance be aligned using an RNA secondary structure aware algorithm rather than to rely on sequence data alone. The software is freely available from
},
keywords = {alignment, bacteria, phylogenetics, RNA structure, software},
pubstate = {published},
tppubtype = {inproceedings}
}
2011
Lechner, Marcus; Findeiss, Sven; Steiner, Lydia; Marz, Manja; Stadler, Peter F; Prohaska, Sonja J
Proteinortho: detection of (co-)orthologs in large-scale analysis Journal Article
In: BMC Bioinf, vol. 12, pp. 124, 2011.
Abstract | Links | BibTeX | Tags: alignment, bacteria, phylogenetics, proteins, software
@article{Lechner:11,
title = {Proteinortho: detection of (co-)orthologs in large-scale analysis},
author = {Marcus Lechner and Sven Findeiss and Lydia Steiner and Manja Marz and Peter F Stadler and Sonja J Prohaska},
url = {http://bioinf.pharmazie.uni-marburg.de/supplements/proteinortho/},
doi = {10.1186/1471-2105-12-124},
year = {2011},
date = {2011-04-28},
urldate = {2011-04-28},
journal = {BMC Bioinf},
volume = {12},
pages = {124},
abstract = {Orthology analysis is an important part of data analysis in many areas of bioinformatics such as comparative genomics and molecular phylogenetics. The ever-increasing flood of sequence data, and hence the rapidly increasing number of genomes that can be compared simultaneously, calls for efficient software tools as brute-force approaches with quadratic memory requirements become infeasible in practise. The rapid pace at which new data become available, furthermore, makes it desirable to compute genome-wide orthology relations for a given dataset rather than relying on relations listed in databases. The program Proteinortho described here is a stand-alone tool that is geared towards large datasets and makes use of distributed computing techniques when run on multi-core hardware. It implements an extended version of the reciprocal best alignment heuristic. We apply Proteinortho to compute orthologous proteins in the complete set of all 717 eubacterial genomes available at NCBI at the beginning of 2009. We identified thirty proteins present in 99% of all bacterial proteomes. Proteinortho significantly reduces the required amount of memory for orthology analysis compared to existing tools, allowing such computations to be performed on off-the-shelf hardware.},
keywords = {alignment, bacteria, phylogenetics, proteins, software},
pubstate = {published},
tppubtype = {article}
}
2010
Yusuf, Dilmurat; Marz, Manja; Stadler, Peter F; Hofacker, Ivo L
Bcheck: a wrapper tool for detecting RNase P RNA genes Journal Article
In: BMC Genomics, vol. 11, pp. 432, 2010.
Abstract | Links | BibTeX | Tags: annotation, bacteria, classification, fungi, ncRNAs, RNA / transcriptomics, software
@article{Yusuf:10,
title = {Bcheck: a wrapper tool for detecting RNase P RNA genes},
author = {Dilmurat Yusuf and Manja Marz and Peter F Stadler and Ivo L Hofacker},
url = {http://rna.tbi.univie.ac.at/bcheck},
doi = {10.1186/1471-2164-11-432},
year = {2010},
date = {2010-07-13},
urldate = {2010-07-13},
journal = {BMC Genomics},
volume = {11},
pages = {432},
abstract = {Effective bioinformatics solutions are needed to tackle challenges posed by industrial-scale genome annotation. We present Bcheck, a wrapper tool which predicts RNase P RNA genes by combining the speed of pattern matching and sensitivity of covariance models. The core of Bcheck is a library of subfamily specific descriptor models and covariance models. Scanning all microbial genomes in GenBank identifies RNase P RNA genes in 98% of 1024 microbial chromosomal sequences within just 4 hours on single CPU. Comparing to existing annotations found in 387 of the GenBank files, Bcheck predictions have more intact structure and are automatically classified by subfamily membership. For eukaryotic chromosomes Bcheck could identify the known RNase P RNA genes in 84 out of 85 metazoan genomes and 19 out of 21 fungi genomes. Bcheck predicted 37 novel eukaryotic RNase P RNA genes, 32 of which are from fungi. Gene duplication events are observed in at least 20 metazoan organisms. Scanning of meta-genomic data from the Global Ocean Sampling Expedition, comprising over 10 million sample sequences (18 Gigabases), predicted 2909 unique genes, 98% of which fall into ancestral bacteria A type of RNase P RNA and 66% of which have no close homolog to known prokaryotic RNase P RNA. The combination of efficient filtering by means of a descriptor-based search and subsequent construction of a high-quality gene model by means of a covariance model provides an efficient method for the detection of RNase P RNA genes in large-scale sequencing data. Bcheck is implemented as webserver and can also be downloaded for local use from http://rna.tbi.univie.ac.at/bcheck.},
keywords = {annotation, bacteria, classification, fungi, ncRNAs, RNA / transcriptomics, software},
pubstate = {published},
tppubtype = {article}
}
