@article{394e0a2f705c42af886502da481bb17d,
title = "Control-independent mosaic single nucleotide variant detection with DeepMosaic",
abstract = "Mosaic variants (MVs) reflect mutagenic processes during embryonic development and environmental exposure, accumulate with aging and underlie diseases such as cancer and autism. The detection of noncancer MVs has been computationally challenging due to the sparse representation of nonclonally expanded MVs. Here we present DeepMosaic, combining an image-based visualization module for single nucleotide MVs and a convolutional neural network-based classification module for control-independent MV detection. DeepMosaic was trained on 180,000 simulated or experimentally assessed MVs, and was benchmarked on 619,740 simulated MVs and 530 independent biologically tested MVs from 16 genomes and 181 exomes. DeepMosaic achieved higher accuracy compared with existing methods on biological data, with a sensitivity of 0.78, specificity of 0.83 and positive predictive value of 0.96 on noncancer whole-genome sequencing data, as well as doubling the validation rate over previous best-practice methods on noncancer whole-exome sequencing data (0.43 versus 0.18). DeepMosaic represents an accurate MV classifier for noncancer samples that can be implemented as an alternative or complement to existing methods.",
author = "{NIMH Brain Somatic Mosaicism Network} and Xiaoxu Yang and Xin Xu and Breuss, {Martin W.} and Danny Antaki and Ball, {Laurel L.} and Changuk Chung and Jiawei Shen and Chen Li and George, {Renee D.} and Yifan Wang and Taejeong Bae and Yuhe Cheng and Alexej Abyzov and Liping Wei and Alexandrov, {Ludmil B.} and Sebat, {Jonathan L.} and Dan Averbuj and Subhojit Roy and Eric Courchesne and Huang, {August Y.} and Alissa D{\textquoteright}Gama and Caroline Dias and Walsh, {Christopher A.} and Javier Ganz and Michael Lodato and Michael Miller and Pengpeng Li and Rachel Rodin and Robert Hill and Sara Bizzotto and Sattar Khoshkhoo and Zinan Zhou and Alice Lee and Alison Barton and Alon Galor and Chong Chu and Craig Bohrson and Doga Gulhan and Eduardo Maury and Elaine Lim and Euncheon Lim and Giorgio Melloni and Isidro Cortes and Jake Lee and Joe Luquette and Lixing Yang and Maxwell Sherman and Michael Coulter and Minseok Kwon and Park, {Peter J.}",
note = "Funding Information: We thank Y. Dou for helping to set up the MosaicForecast pipeline. We thank M. K. Gilson for the help with computational resources. We thank P. J. Park, G. W. Cottrell, J. V. Moran, M. Gymrek, P. J. Reed, A. Y. Huang, S.-J. Cheng and Y. Chen for their valuable comments, help and suggestions. This work was supported by the National Institute of Mental Health (NIMH) (grant nos. U01MH108898 and R01MH124890 to J.G.G.), Rady Children{\textquoteright}s Institute for Genomic Medicine and the Howard Hughes Medical Institute. We thank San Diego Supercomputer Center (grant no. TG-IBN190021 to X.Y. and J.G.G.) for computational help. This publication includes data generated at the UC San Diego IGM Genomics Center using an Illumina NovaSeq 6000 platform that was purchased with funding from a National Institutes of Health SIG grant (no. S10OD026929 X.Y. and J.G.G.). Publisher Copyright: {\textcopyright} 2023, The Author(s), under exclusive licence to Springer Nature America, Inc.",
year = "2023",
doi = "10.1038/s41587-022-01559-w",
language = "English (US)",
journal = "Biotechnology",
issn = "1087-0156",
publisher = "Nature Publishing Group",
}