Conference paper Open Access
Poon, Lex;
Farshidi, Siamak;
Li, Na;
Zhao, Zhiming
{ "files": [ { "links": { "self": "https://zenodo.org/api/files/c1f00f80-f8b9-4bb7-9381-2d436ef57cde/2021.workshop.bigdata.midp21.camera.pdf" }, "checksum": "md5:51cca856e3286b38b7b738f7d43e9a86", "bucket": "c1f00f80-f8b9-4bb7-9381-2d436ef57cde", "key": "2021.workshop.bigdata.midp21.camera.pdf", "type": "pdf", "size": 2899991 } ], "owners": [ 22243 ], "doi": "10.1109/BigData52589.2021.9671672", "stats": { "version_unique_downloads": 45.0, "unique_views": 28.0, "views": 35.0, "version_views": 35.0, "unique_downloads": 45.0, "version_unique_views": 28.0, "volume": 133399586.0, "version_downloads": 46.0, "downloads": 46.0, "version_volume": 133399586.0 }, "links": { "doi": "https://doi.org/10.1109/BigData52589.2021.9671672", "latest_html": "https://zenodo.org/record/5872438", "bucket": "https://zenodo.org/api/files/c1f00f80-f8b9-4bb7-9381-2d436ef57cde", "badge": "https://zenodo.org/badge/doi/10.1109/BigData52589.2021.9671672.svg", "html": "https://zenodo.org/record/5872438", "latest": "https://zenodo.org/api/records/5872438" }, "created": "2022-01-18T12:44:13.333953+00:00", "updated": "2022-01-19T01:49:10.168183+00:00", "conceptrecid": "5872437", "revision": 2, "id": 5872438, "metadata": { "access_right_category": "success", "doi": "10.1109/BigData52589.2021.9671672", "version": "camera ready", "license": { "id": "CC-BY-4.0" }, "title": "Unsupervised Anomaly Detection in Data Quality Control", "relations": { "version": [ { "count": 1, "index": 0, "parent": { "pid_type": "recid", "pid_value": "5872437" }, "is_last": true, "last_child": { "pid_type": "recid", "pid_value": "5872438" } } ] }, "grants": [ { "code": "860627", "links": { "self": "https://zenodo.org/api/grants/10.13039/501100000780::860627" }, "title": "CLoud ARtificial Intelligence For pathologY", "acronym": "CLARIFY", "program": "Horizon 2020 Framework Programme - European Training Networks", "funder": { "doi": "10.13039/501100000780", "acronyms": [], "name": "European Commission", "links": { "self": "https://zenodo.org/api/funders/10.13039/501100000780" } } }, { "code": "862409", "links": { "self": "https://zenodo.org/api/grants/10.13039/501100000780::862409" }, "title": "Blue-Cloud: Piloting innovative services for Marine Research & the Blue Economy", "acronym": "Blue Cloud", "program": "H2020", "funder": { "doi": "10.13039/501100000780", "acronyms": [], "name": "European Commission", "links": { "self": "https://zenodo.org/api/funders/10.13039/501100000780" } } }, { "code": "825134", "links": { "self": "https://zenodo.org/api/grants/10.13039/501100000780::825134" }, "title": "smART socIal media eCOsytstem in a blockchaiN Federated environment", "acronym": "ARTICONF", "program": "H2020", "funder": { "doi": "10.13039/501100000780", "acronyms": [], "name": "European Commission", "links": { "self": "https://zenodo.org/api/funders/10.13039/501100000780" } } }, { "code": "824068", "links": { "self": "https://zenodo.org/api/grants/10.13039/501100000780::824068" }, "title": "ENVironmental Research Infrastructures building Fair services Accessible for society, Innovation and Research", "acronym": "ENVRI-FAIR", "program": "H2020", "funder": { "doi": "10.13039/501100000780", "acronyms": [], "name": "European Commission", "links": { "self": "https://zenodo.org/api/funders/10.13039/501100000780" } } } ], "keywords": [ "data quality", "unsupervised learning", "data quality control", "data quality assessment", "anomaly detection,", "automated data quality control" ], "publication_date": "2021-12-15", "creators": [ { "affiliation": "University of Amsterdam", "name": "Poon, Lex" }, { "affiliation": "University of Amsterdam", "name": "Farshidi, Siamak" }, { "affiliation": "University of Amsterdam", "name": "Li, Na" }, { "orcid": "0000-0002-6717-9418", "affiliation": "University of Amsterdam", "name": "Zhao, Zhiming" } ], "meeting": { "acronym": "MIDP-2021", "url": "http://www.midp-info.org/", "dates": "15-18 Dec 2021", "place": "Virtual", "title": "7th International Workshop on Methods to Improve Big Data Science Projects (MIDP-2021), in IEEE BigData 2021" }, "access_right": "open", "resource_type": { "subtype": "conferencepaper", "type": "publication", "title": "Conference paper" }, "description": "<p>Data is one of the most valuable assets of an</p>\n\n<p>organization and has a tremendous impact on its long-term</p>\n\n<p>success and decision-making processes. Typically, organizational</p>\n\n<p>data error and outlier detection processes perform manually and</p>\n\n<p>reactively, making them time-consuming and prone to human errors.</p>\n\n<p>Additionally, rich data types, unlabeled data, and increased</p>\n\n<p>volume have made such data more complex. Accordingly, an</p>\n\n<p>automated anomaly detection approach is required to improve</p>\n\n<p>data management and quality control processes. This study</p>\n\n<p>introduces an unsupervised anomaly detection approach based</p>\n\n<p>on models comparison, consensus learning, and a combination of</p>\n\n<p>rules of thumb with iterative hyper-parameter tuning to increase</p>\n\n<p>data quality. Furthermore, a domain expert is considered a</p>\n\n<p>human in the loop to evaluate and check the data quality and to</p>\n\n<p>judge the output of the unsupervised model. An experiment has</p>\n\n<p>been conducted to assess the proposed approach in the context of</p>\n\n<p>a case study. The experiment results confirm that the proposed</p>\n\n<p>approach can improve the quality of</p>" } }
Views | 35 |
Downloads | 46 |
Data volume | 133.4 MB |
Unique views | 28 |
Unique downloads | 45 |