@inproceedings{cb4101088c424a7a994e5113abbc647f,
title = "Development of a semi-synthetic dataset as a testbed for big-data semantic analytics",
abstract = "We have developed a large semi-synthetic, semantically rich dataset, modeled after the medical record of a large medical institution. Using the highly diverse data.gov data repository and a multivariate data augmentation strategy, we can generate arbitrarily large semi-synthetic datasets which can be used to test new algorithms and computational platforms. The construction process and basic data characterization are described. The databases, as well as code for data collection, consolidation, and augmentation are available for distribution.",
keywords = "RDF, big data, data.gov, graph computing, semantic representation",
author = "Robert Techentin and Daniel Foti and Peter Li and Erik Daniel and Barry Gilbert and David Holmes and Sinan Al-Saffar",
year = "2014",
doi = "10.1109/ICSC.2014.45",
language = "English (US)",
isbn = "9781479940028",
series = "Proceedings - 2014 IEEE International Conference on Semantic Computing, ICSC 2014",
publisher = "IEEE Computer Society",
pages = "252--253",
booktitle = "Proceedings - 2014 IEEE International Conference on Semantic Computing, ICSC 2014",
note = "8th IEEE International Conference on Semantic Computing, ICSC 2014 ; Conference date: 16-06-2014 Through 18-06-2014",
}