E-Mail: sebastian.krautwurst@uni-jena.de
Room: 08S01
Phone: +49-3641-9-46483
Publications
2023
Meyer, Daria; Goettsch, Winfried; Spangenberg, Jannes; Bohn, Patrick; Stieber, Bettina; Krautwurst, Sebastian; zu Siederdissen, Christian Höner; Srivastava, Akash; Zarkovic, Milena; Wollny, Damian; Marz, Manja
Maximizing the potential of genomic and transcriptomic studies by nanopore sequencing Journal Article
In: bioRxiv, 2023.
@article{nokey,
title = {Maximizing the potential of genomic and transcriptomic studies by nanopore sequencing},
author = {Daria Meyer and Winfried Goettsch and Jannes Spangenberg and Patrick Bohn and Bettina Stieber and Sebastian Krautwurst and Christian {Höner zu Siederdissen} and Akash Srivastava and Milena Zarkovic and Damian Wollny and Manja Marz},
doi = {10.1101/2023.12.06.570356},
year = {2023},
date = {2023-12-07},
urldate = {2023-12-07},
journal = {bioRxiv},
abstract = {Nucleic acid sequencing is the process of identifying the sequence of DNA or RNA, with DNA used for genomes and RNA for transcriptomes. Deciphering this information has the potential to greatly advance our understanding of genomic features and cellular functions. In comparison to other available sequencing methods, nanopore sequencing stands out due to its unique advantages of processing long nucleic acid strands in real time, within a small portable device, enabling the rapid analysis of samples in diverse settings. Evolving over the past decade, nanopore sequencing remains in a state of ongoing development and refinement, resulting in persistent challenges in protocols and technology. This article employs an interdisciplinary approach, evaluating experimental and computational methods to address critical gaps in our understanding in order to maximise the information gain from this advancing technology. We present a robust analysis of all aspects of nanopore sequencing by providing statistically supported insights, thus aiming to provide comprehensive guidelines for the diverse challenges that frequently impede optimal experimental outcomes. Here we present a robust analysis, bridging the gap by providing statistically supported insights into genomic and transcriptomic studies, providing fresh perspectives on sequencing.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Santos, José Diogo Neves Dos; Vitorino, Inês Rosado; Kallscheuer, Nicolai; Srivastava, Akash; Krautwurst, Sebastian; Marz, Manja; Jogler, Christian; Lobo-da-Cunha, Alexandre; Catita, José; Gonçalves, Hugo; González, Ignacio; Reyes, Fernando; Lage, Olga Maria
Streptomyces marispadix sp. nov., isolated from marine beach sediment Journal Article
In: International Journal of Systematic and Evolutionary Microbiology, vol. 73, no. 7, 2023, ISBN: 1466-5034.
@article{nokey_41,
title = {\textit{Streptomyces marispadix} sp. nov., isolated from marine beach sediment},
author = {José Diogo Neves Dos Santos and Inês Rosado Vitorino and Nicolai Kallscheuer and Akash Srivastava and Sebastian Krautwurst and Manja Marz and Christian Jogler and Alexandre Lobo-da-Cunha and José Catita and Hugo Gonçalves and Ignacio González and Fernando Reyes and Olga Maria Lage},
doi = {10.1099/ijsem.0.005956},
isbn = {1466-5034},
year = {2023},
date = {2023-07-25},
urldate = {2023-07-25},
journal = {International Journal of Systematic and Evolutionary Microbiology},
volume = {73},
number = {7},
abstract = {A novel actinomycetal strain, designated M600PL45_2T, was isolated from marine sediments obtained from Ingleses beach, Porto, on the Northern Coast of Portugal and was subjected to a polyphasic taxonomic characterisation study. The here described Gram-reaction-positive strain is characterised by the production of a brown pigment in both solid and liquid medium and forms typical helical hyphae that differentiate into smooth spores. The results of a phylogenetic analysis based on the 16S rRNA gene sequence indicated that M600PL45_2T has a high similarity to two members of the genus Streptomyces , Streptomyces bathyalis ASO4wetT (98.51 %) and Streptomyces daqingensis NEAU ZJC8T (98.44 %). The genome of M600PL45_2T has a size of 6 695 159 bp, a DNA G+C content of 70.71 mol% and 5538 coding sequences. M600PL45_2T grows at 15–37 °C and with a maximal growth rate between 25 °C and 30 °C. Growth at pH 6.0 to 9.0 with the optimal range between 6.0 and 7.5 was observed. M600PL45_2T showed a high salinity tolerance, growing with 0–10 % (w/v) NaCl, with best growth with 1–3% (w/v) NaCl. Major cellular fatty acids are iso-C15:0 (25.03 %), anteiso-C15:0 (17.70) and iso-C16:0 (26.90 %). The novel isolate was able to grow in media containing a variety of nitrogen and carbon sources. An antimicrobial activity screening indicated that an extract of M600PL45_2T has inhibitory activity against Staphylococcus aureus . On the basis of the polyphasic data, M600PL45_2T (= CECT 30365T = DSM 114036T) is introduced as the type strain of a novel species, that we named Streptomyces marispadix sp. nov.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Erkes, Annett; Grove, René P; Žarković, Milena; Krautwurst, Sebastian; Koebnik, Ralf; Morgan, Richard D; Wilson, Geoffrey G; Hölzer, Martin; Marz, Manja; Boch, Jens; Grau, Jan
Assembling highly repetitive Xanthomonas TALomes using Oxford Nanopore sequencing Journal Article
In: BMC Genomics, vol. 24, iss. 1, pp. 151, 2023.
@article{nokey,
title = {Assembling highly repetitive Xanthomonas TALomes using Oxford Nanopore sequencing},
author = {Annett Erkes and René P Grove and Milena Žarković and Sebastian Krautwurst and Ralf Koebnik and Richard D Morgan and Geoffrey G Wilson and Martin Hölzer and Manja Marz and Jens Boch and Jan Grau
},
doi = {10.1186/s12864-023-09228-1},
year = {2023},
date = {2023-03-27},
journal = {BMC Genomics},
volume = {24},
issue = {1},
pages = {151},
abstract = {Background: Most plant-pathogenic Xanthomonas bacteria harbor transcription activator-like effector (TALE) genes, which function as transcriptional activators of host plant genes and support infection. The entire repertoire of up to 29 TALE genes of a Xanthomonas strain is also referred to as TALome. The DNA-binding domain of TALEs is comprised of highly conserved repeats and TALE genes often occur in gene clusters, which precludes the assembly of TALE-carrying Xanthomonas genomes based on standard sequencing approaches.
Results: Here, we report the successful assembly of the 5 Mbp genomes of five Xanthomonas strains from Oxford Nanopore Technologies (ONT) sequencing data. For one of these strains, Xanthomonas oryzae pv. oryzae (Xoo) PXO35, we illustrate why Illumina short reads and longer PacBio reads are insufficient to fully resolve the genome. While ONT reads are perfectly suited to yield highly contiguous genomes, they suffer from a specific error profile within homopolymers. To still yield complete and correct TALomes from ONT assemblies, we present a computational correction pipeline specifically tailored to TALE genes, which yields at least comparable accuracy as Illumina-based polishing. We further systematically assess the ONT-based pipeline for its multiplexing capacity and find that, combined with computational correction, the complete TALome of Xoo PXO35 could have been reconstructed from less than 20,000 ONT reads.
Conclusions: Our results indicate that multiplexed ONT sequencing combined with a computational correction of TALE genes constitutes a highly capable tool for characterizing the TALomes of huge collections of Xanthomonas strains in the future.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Results: Here, we report the successful assembly of the 5 Mbp genomes of five Xanthomonas strains from Oxford Nanopore Technologies (ONT) sequencing data. For one of these strains, Xanthomonas oryzae pv. oryzae (Xoo) PXO35, we illustrate why Illumina short reads and longer PacBio reads are insufficient to fully resolve the genome. While ONT reads are perfectly suited to yield highly contiguous genomes, they suffer from a specific error profile within homopolymers. To still yield complete and correct TALomes from ONT assemblies, we present a computational correction pipeline specifically tailored to TALE genes, which yields at least comparable accuracy as Illumina-based polishing. We further systematically assess the ONT-based pipeline for its multiplexing capacity and find that, combined with computational correction, the complete TALome of Xoo PXO35 could have been reconstructed from less than 20,000 ONT reads.
Conclusions: Our results indicate that multiplexed ONT sequencing combined with a computational correction of TALE genes constitutes a highly capable tool for characterizing the TALomes of huge collections of Xanthomonas strains in the future.
2022
Fuesslin, Valeria; Krautwurst, Sebastian; Srivastava, Akash; Winter, Doris; Liedigk, Britta; Thye, Thorsten; Herrera-León, Silvia; Wohl, Shirlee; May, Jürgen; Fobil, Julius N.; Eibach, Daniel; Marz, Manja; Schuldt, Kathrin
In: Front Microbiol, vol. 13, pp. 909692, 2022.
@article{Fuesslin2022,
title = {Prediction of Antibiotic Susceptibility Profiles of \textit{Vibrio cholerae} Isolates From Whole Genome Illumina and Nanopore Sequencing Data: CholerAegon},
author = {Valeria Fuesslin and Sebastian Krautwurst and Akash Srivastava and Doris Winter and Britta Liedigk and Thorsten Thye and Silvia Herrera-León and Shirlee Wohl and Jürgen May and Julius N. Fobil and Daniel Eibach and Manja Marz and Kathrin Schuldt},
url = {https://github.com/RaverJay/CholerAegon },
doi = {10.3389/fmicb.2022.909692},
year = {2022},
date = {2022-06-22},
journal = {Front Microbiol},
volume = {13},
pages = {909692},
abstract = {During the last decades, antimicrobial resistance (AMR) has become a global public health concern. Nowadays multi-drug resistance is commonly observed in strains of Vibrio cholerae, the etiological agent of cholera. In order to limit the spread of pathogenic drug-resistant bacteria and to maintain treatment options the analysis of clinical samples and their AMR profiles are essential. Particularly, in low-resource settings a timely analysis of AMR profiles is often impaired due to lengthy culturing procedures for antibiotic susceptibility testing or lack of laboratory capacity. In this study, we explore the applicability of whole genome sequencing for the prediction of AMR profiles of V. cholerae. We developed the pipeline CholerAegon for the in silico prediction of AMR profiles of 82 V. cholerae genomes assembled from long and short sequencing reads. By correlating the predicted profiles with results from phenotypic antibiotic susceptibility testing we show that the prediction can replace in vitro susceptibility testing for five of seven antibiotics. Because of the relatively low costs, possibility for real-time data analyses, and portability, the Oxford Nanopore Technologies MinION sequencing platform—especially in light of an upcoming less error-prone technology for the platform—appears to be well suited for pathogen genomic analyses such as the one described here. Together with CholerAegon, it can leverage pathogen genomics to improve disease surveillance and to control further spread of antimicrobial resistance.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2021
Brandt, Christian; Krautwurst, Sebastian; Spott, Riccardo; Lohde, Mara; Jundzill, Mateusz; Marquet, Mike; Hölzer, Martin
poreCov - An Easy to Use, Fast, and Robust Workflow for SARS-CoV-2 Genome Reconstruction via Nanopore Sequencing Journal Article
In: Front Genet, vol. 12, pp. 711437, 2021.
@article{Brandt2021,
title = {poreCov - An Easy to Use, Fast, and Robust Workflow for SARS-CoV-2 Genome Reconstruction via Nanopore Sequencing},
author = {Christian Brandt and Sebastian Krautwurst and Riccardo Spott and Mara Lohde and Mateusz Jundzill and Mike Marquet and Martin Hölzer},
url = {https://github.com/replikation/poreCov},
doi = {10.3389/fgene.2021.711437},
year = {2021},
date = {2021-07-28},
urldate = {2021-07-28},
journal = {Front Genet},
volume = {12},
pages = {711437},
abstract = {In response to the SARS-CoV-2 pandemic, a highly increased sequencing effort has been established worldwide to track and trace ongoing viral evolution. Technologies, such as nanopore sequencing via the ARTIC protocol are used to reliably generate genomes from raw sequencing data as a crucial base for molecular surveillance. However, for many labs that perform SARS-CoV-2 sequencing, bioinformatics is still a major bottleneck, especially if hundreds of samples need to be processed in a recurring fashion. Pipelines developed for short-read data cannot be applied to nanopore data. Therefore, specific long-read tools and parameter settings need to be orchestrated to enable accurate genotyping and robust reference-based genome reconstruction of SARS-CoV-2 genomes from nanopore data. Here we present poreCov, a highly parallel workflow written in Nextflow, using containers to wrap all the tools necessary for a routine SARS-CoV-2 sequencing lab into one program. The ease of installation, combined with concise summary reports that clearly highlight all relevant information, enables rapid and reliable analysis of hundreds of SARS-CoV-2 raw sequence data sets or genomes. poreCov is freely available on GitHub under the GNUv3 license: github.com/replikation/poreCov.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Krautwurst, Sebastian; Dijkman, Ronald; Thiel, Volker; Krumbholz, Andi; Marz, Manja
Direct RNA Sequencing for Complete Viral Genomes Book Section
In: Frishman, Dmitrij; Marz, Manja (Ed.): Virus Bioinformatics, CRC Press, 2021.
@incollection{Krautwurst:21,
title = {Direct RNA Sequencing for Complete Viral Genomes},
author = {Sebastian Krautwurst and Ronald Dijkman and Volker Thiel and Andi Krumbholz and Manja Marz},
editor = {Dmitrij Frishman and Manja Marz},
url = {https://www.taylorfrancis.com/chapters/edit/10.1201/9781003097679-3/direct-rna-sequencing-complete-viral-genomes-sebastian-krautwurst-ronald-dijkman-volker-thiel-andi-krumbholz-manja-marz},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {Virus Bioinformatics},
publisher = {CRC Press},
abstract = {Determination of nucleotide sequences present in biological samples (termed “sequencing”) has become a key method in almost all fields of bioscience, including virology. Since the advent of high-throughput sequencing (“second-generation sequencing”), it is possible to sequence millions of DNA fragments (“reads”) in parallel at very high accuracy, enabling the inference of single nucleotide polymorphisms (SNPs) between virus strains.
In this chapter, we provide details on how the long-read sequencing technologies (“third-generation sequencing”) which were developed in recent years have expanded the toolkit for researchers beyond the possibilities of short-read sequencing, with a focus on virus sequencing. With increased read lengths, it is possible to sequence full viral transcripts and genomes in single contiguous reads, enabling detailed studies of transcript isoforms, haplotypes, and viral quasispecies. In comparison, long-read technologies have generally higher raw read error rates, but an accurate assembly of transcripts and genomes is facilitated or made unnecessary due to the long contiguous sequences. One of the technologies, namely nanopore sequencing, also uniquely allows for direct RNA sequencing without the need for the creation or amplification of complementary DNA. This enables accurate capture of RNA content in a sample “as is,” e.g., in cells infected by RNA viruses. The protocol also leaves RNA modifications intact, which can be inferred during sequencing. Nanopore sequencing can be implemented at low costs and with constant genome coverage using cDNA amplicon sequencing methods, e.g., for highly parallel screening during virus outbreaks.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
In this chapter, we provide details on how the long-read sequencing technologies (“third-generation sequencing”) which were developed in recent years have expanded the toolkit for researchers beyond the possibilities of short-read sequencing, with a focus on virus sequencing. With increased read lengths, it is possible to sequence full viral transcripts and genomes in single contiguous reads, enabling detailed studies of transcript isoforms, haplotypes, and viral quasispecies. In comparison, long-read technologies have generally higher raw read error rates, but an accurate assembly of transcripts and genomes is facilitated or made unnecessary due to the long contiguous sequences. One of the technologies, namely nanopore sequencing, also uniquely allows for direct RNA sequencing without the need for the creation or amplification of complementary DNA. This enables accurate capture of RNA content in a sample “as is,” e.g., in cells infected by RNA viruses. The protocol also leaves RNA modifications intact, which can be inferred during sequencing. Nanopore sequencing can be implemented at low costs and with constant genome coverage using cDNA amplicon sequencing methods, e.g., for highly parallel screening during virus outbreaks.
2019
Mostajo, Nelly F.; Lataretu, Marie; Krautwurst, Sebastian; Mock, Florian; Desirò, Daniel; Lamkiewicz, Kevin; Collatz, Maximilian; Schoen, Andreas; Weber, Friedemann; Marz, Manja; Hölzer, Martin
A comprehensive annotation and differential expression analysis of short and long non-coding RNAs in 16 bat genomes Journal Article
In: NAR Genomics Bioinf, vol. 2, no. 1, pp. lqz006, 2019.
@article{Mostajo:20,
title = {A comprehensive annotation and differential expression analysis of short and long non-coding RNAs in 16 bat genomes},
author = {Nelly F. Mostajo and Marie Lataretu and Sebastian Krautwurst and Florian Mock and Daniel Desirò and Kevin Lamkiewicz and Maximilian Collatz and Andreas Schoen and Friedemann Weber and Manja Marz and Martin Hölzer},
url = {https://www.rna.uni-jena.de/supplements/bats/index.html},
doi = {10.1093/nargab/lqz006},
year = {2019},
date = {2019-09-30},
urldate = {2019-09-30},
journal = {NAR Genomics Bioinf},
volume = {2},
number = {1},
pages = {lqz006},
abstract = {Although bats are increasingly becoming the focus of scientific studies due to their unique properties, these exceptional animals are still among the least studied mammals. Assembly quality and completeness of bat genomes vary a lot and especially non-coding RNA (ncRNA) annotations are incomplete or simply missing. Accordingly, standard bioinformatics pipelines for gene expression analysis often ignore ncRNAs such as microRNAs or long antisense RNAs. The main cause of this problem is the use of incomplete genome annotations. We present a complete screening for ncRNAs within 16 bat genomes. NcRNAs affect a remarkable variety of vital biological functions, including gene expression regulation, RNA processing, RNA interference and, as recently described, regulatory processes in viral infections. Within all investigated bat assemblies, we annotated 667 ncRNA families including 162 snoRNAs and 193 miRNAs as well as rRNAs, tRNAs, several snRNAs and lncRNAs, and other structural ncRNA elements. We validated our ncRNA candidates by six RNA-Seq data sets and show significant expression patterns that have never been described before in a bat species on such a large scale. Our annotations will be usable as a resource (rna.uni-jena.de/supplements/bats) for deeper studying of bat evolution, ncRNAs repertoire, gene expression and regulation, ecology and important host–virus interactions.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Viehweger, Adrian; Krautwurst, Sebastian; Lamkiewicz, Kevin; Madhugiri, Ramakanth; Ziebuhr, John; Hölzer, Martin; Marz, Manja
In: Genome Res, vol. 29, pp. 1545-1554, 2019.
@article{Viehweger:19a,
title = {Direct RNA nanopore sequencing of full-length coronavirus genomes provides novel insights into structural variants and enables modification analysis.},
author = {Adrian Viehweger and Sebastian Krautwurst and Kevin Lamkiewicz and Ramakanth Madhugiri and John Ziebuhr and Martin Hölzer and Manja Marz},
doi = {10.1101/gr.247064.118},
year = {2019},
date = {2019-08-22},
urldate = {2019-08-22},
journal = {Genome Res},
volume = {29},
pages = {1545-1554},
publisher = {Cold Spring Harbor Laboratory},
abstract = {Sequence analyses of RNA virus genomes remain challenging owing to the exceptional genetic plasticity of these viruses. Because of high mutation and recombination rates, genome replication by viral RNA-dependent RNA polymerases leads to populations of closely related viruses, so-called “quasispecies.” Standard (short-read) sequencing technologies are ill-suited to reconstruct large numbers of full-length haplotypes of (1) RNA virus genomes and (2) subgenome-length (sg) RNAs composed of noncontiguous genome regions. Here, we used a full-length, direct RNA sequencing (DRS) approach based on nanopores to characterize viral RNAs produced in cells infected with a human coronavirus. By using DRS, we were able to map the longest (∼26-kb) contiguous read to the viral reference genome. By combining Illumina and Oxford Nanopore sequencing, we reconstructed a highly accurate consensus sequence of the human coronavirus (HCoV)-229E genome (27.3 kb). Furthermore, by using long reads that did not require an assembly step, we were able to identify, in infected cells, diverse and novel HCoV-229E sg RNAs that remain to be characterized. Also, the DRS approach, which circumvents reverse transcription and amplification of RNA, allowed us to detect methylation sites in viral RNAs. Our work paves the way for haplotype-based analyses of viral quasispecies by showing the feasibility of intra-sample haplotype separation. Even though several technical challenges remain to be addressed to exploit the potential of the nanopore technology fully, our work illustrates that DRS may significantly advance genomic studies of complex virus populations, including predictions on long-range interactions in individual full-length viral RNA haplotypes.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Viehweger, Adrian; Krautwurst, Sebastian; Koenig, Brigitte; Marz, Manja
An encoding of genome content for machine learning Journal Article
In: bioRxiv, pp. 524280, 2019.
@article{Viehweger:19,
title = {An encoding of genome content for machine learning},
author = {Adrian Viehweger and Sebastian Krautwurst and Brigitte Koenig and Manja Marz},
url = {https://github.com/phiweger/nanotext},
doi = {10.1101/524280},
year = {2019},
date = {2019-01-18},
urldate = {2019-01-18},
journal = {bioRxiv},
pages = {524280},
publisher = {Cold Spring Harbor Laboratory},
abstract = {An ever-growing number of metagenomes can be used for biomining and the study of microbial functions. The use of learning algorithms in this context has been hindered, because they often need input in the form of low-dimensional, dense vectors of numbers. We propose such a representation for genomes called nanotext that scales to very large data sets.
The underlying model is learned from a corpus of nearly 150 thousand genomes spanning 750 million protein domains. We treat the protein domains in a genome like words in a document, assuming that protein domains in a similar context have similar “meaning”. This meaning can be distributed by a neural net over a vector of numbers.
The resulting vectors efficiently encode function, preserve known phylogeny, capture subtle functional relationships and are robust against genome incompleteness. The “functional” distance between two vectors complements nucleotide-based distance, so that genomes can be identified as similar even though their nucleotide identity is low. nanotext can thus encode (meta)genomes for direct use in downstream machine learning tasks. We show this by predicting plausible culture media for metagenome assembled genomes (MAGs) from the Tara Oceans Expedition using their genome content only. nanotext is freely released under a BSD licence (https://github.com/phiweger/nanotext).},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
The underlying model is learned from a corpus of nearly 150 thousand genomes spanning 750 million protein domains. We treat the protein domains in a genome like words in a document, assuming that protein domains in a similar context have similar “meaning”. This meaning can be distributed by a neural net over a vector of numbers.
The resulting vectors efficiently encode function, preserve known phylogeny, capture subtle functional relationships and are robust against genome incompleteness. The “functional” distance between two vectors complements nucleotide-based distance, so that genomes can be identified as similar even though their nucleotide identity is low. nanotext can thus encode (meta)genomes for direct use in downstream machine learning tasks. We show this by predicting plausible culture media for metagenome assembled genomes (MAGs) from the Tara Oceans Expedition using their genome content only. nanotext is freely released under a BSD licence (https://github.com/phiweger/nanotext).