@inproceedings{b30305b19a7145ccadbe80f3bb66efdb,
title = "Building a Research-Quality Copy Number Variation Data Repository for Translational Research",
abstract = "Copy number variation (CNV) has known associations with population diversities and disease conditions. However, research communities face great challenges in reusing the CNV data due to the heterogeneity of existing CNV data sources. The objective of the study is to design, develop and evaluate a scalable CNV data repository based on a proposed common data schema for facilitating research-quality CNV data integration and reuse. We created a proposal for a CNV common data schema through analyzing multiple existing CNV data sources. We designed a collection of the CNV quality metrics and demonstrated its usefulness using the CNV data from a study of ovarian cancer xenograft models. We implemented a CNV data repository using a MongoDB database backend and established the CNV genomic data services that enable reusing of the curated CNV data and answering CNV-relevant research questions. The critical issues and future plan for the system enhancement and community engagement were discussed.",
keywords = "Copy number variation, Integrated data repository, Quality assurance, Standardization",
author = "Chen Wang and Moore, {Raymond M.} and Evans, {Jared M.} and Xiaonan Hou and {John Weroha}, S. and Guoqian Jiang",
note = "Funding Information: Acknowledgements. The study is supported in part by a NIH BD2KOnFHIR U01 project (U01 HG009450), a NCI U01 Project – caCDE-QA (U01 CA180940), the Mayo Clinic Specialized Program in Research Excellence (SPORE) grant P50 CA136393, R01 CA184502 from the National Institutes of Health, Minnesota Ovarian Cancer Alliance, and Ovarian Cancer Research Fund Alliance. Publisher Copyright: {\textcopyright} 2019, Springer Nature Switzerland AG.; International Workshop on Polystores and other Systems for Heterogeneous Data, Poly 2018 and 4th International Workshop on Data Management and Analytics for Medicine and Health Care, DMAH 2019 held in conjunction with 44th International Conference on Very Large Data Bases, VLDB 2018 ; Conference date: 27-08-2018 Through 31-08-2018",
year = "2019",
doi = "10.1007/978-3-030-14177-6_12",
language = "English (US)",
isbn = "9783030141769",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "148--161",
editor = "Timothy Mattson and Fusheng Wang and George Teodoro and Michael Stonebraker and Vijay Gadepally and Gang Luo",
booktitle = "Heterogeneous Data Management, Polystores, and Analytics for Healthcare - VLDB 2018 Workshops, Poly and DMAH, 2018, Revised Selected Papers",
}