@inproceedings{1aeb01ed720540efa912d210eff8861d,
title = "A Frequency-based Strategy of Obtaining Sentences from Clinical Data Repository for Crowdsourcing",
abstract = "In clinical NLP, one major barrier to adopting crowdsourcing for NLP annotation is the issue of confidentiality for protected health information (PHI) in clinical narratives. In this paper, we investigated the use of a frequency-based approach to extract sentences without PHI. Our approach is based on the assumption that sentences appearing frequently tend to contain no PHI. Both manual and automatic evaluations on 500 sentences out of the 7.9 million sentences of frequencies higher than one show that no PHI can be found among them. The promising results provide potentials of releasing those sentences for obtaining sentence-level NLP annotations via crowdsourcing.",
keywords = "bigram filtering, clinical notes, crowdsourcing, de-identification, high-frequent sentences, patient health information",
author = "Dingcheng Li and {Rastegar Mojarad}, Majid and Yanpeng Li and Sunghwan Sohn and Saeed Mehrabi and {Komandur Elayavilli}, Ravikumar and Yue Yu and Hongfang Liu",
note = "Publisher Copyright: {\textcopyright} 2015 IMIA and IOS Press.; 15th World Congress on Health and Biomedical Informatics, MEDINFO 2015 ; Conference date: 19-08-2015 Through 23-08-2015",
year = "2015",
doi = "10.3233/978-1-61499-564-7-1033",
language = "English (US)",
series = "Studies in Health Technology and Informatics",
publisher = "IOS Press",
pages = "1033--1034",
editor = "Andrew Georgiou and Sarkar, {Indra Neil} and {de Azevedo Marques}, {Paulo Mazzoncini}",
booktitle = "MEDINFO 2015",
}