The current state of the art for provenance in data stream management systems (DSMS) is to provide provenance at a high level of abstraction (such as, from which sensors in a sensor network an aggregated value is derived from). This limitation was imposed by high-throughput requirements and an anticipated lack of application demand for more detailed provenance information. In this work, we first demonstrate by means of well-chosen use cases that this is a misconception, i.e., coarse-grained provenance is in fact insufficient for many application domains. We then analyze the requirements and challenges involved in integrating support for fine-grained provenance into a streaming system and outline a scalable solution for supporting tuple-level provenance in DSMS.
@inproceedings{GE11, author = {Glavic, Boris and Esmaili, Kyumars Sheykh and Fischer, Peter M. and Tatbul, Nesime}, bibsource = {DBLP, http://dblp.uni-trier.de}, booktitle = {Proceedings of the 1st Workshop on Data Streams and Event Processing collocated with BTW}, crossref = {DBLP:conf/btw/2011w}, date-added = {2012-12-14 18:55:49 +0000}, date-modified = {2012-12-18 17:16:17 +0000}, isworkshop = {true}, keywords = {Ariadne; Provenance}, pages = {58-61}, pdfurl = {http://cs.iit.edu/%7edbgroup/assets/pdfpubls/GE11.pdf}, projects = {Ariadne}, title = {{The Case for Fine-Grained Stream Provenance}}, venueshort = {DSEP}, year = {2011}, bdsk-url-1 = {http://cs.iit.edu/%7edbgroup/assets/pdfpubls/GE11.pdf} }