Managing fine-grained provenance is a critical requirement for data stream management systems (DSMS), not only to address complex applications that require diagnostic capabilities and assurance, but also for providing advanced functionality such as revision processing or query debugging. This paper introduces a novel approach that uses operator instrumentation, i.e., modifying the behavior of operators, to generate and propagate fine-grained provenance through several operators of a query network. In addition to applying this technique to compute provenance eagerly during query execution, we also study how to decouple provenance computation from query processing to reduce run-time overhead and avoid unnecessary provenance retrieval. This includes computing a concise superset of the provenance to allow lazily replaying a query network and reconstruct its provenance as well as lazy retrieval to avoid unnecessary reconstruction of provenance. We develop stream-specific compression methods to reduce the computational and storage overhead of provenance generation and retrieval. Ariadne, our provenance-aware extension of the Borealis DSMS implements these techniques. Our experiments confirm that Ariadne manages provenance with minor overhead and clearly outperforms query rewrite, the current state-of-the-art.
@inproceedings{GE13, author = {Glavic, Boris and Esmaili, Kyumars Sheykh and Fischer, Peter M. and Tatbul, Nesime}, booktitle = {Proceedings of the 7th ACM International Conference on Distributed Event-Based Systems}, date-added = {2013-05-13 14:03:56 +0000}, date-modified = {2013-06-02 19:45:50 +0000}, keywords = {Ariadne; Provenance}, pages = {291-320}, pdfurl = {http://cs.iit.edu/%7edbgroup/assets/pdfpubls/GE13.pdf}, projects = {Ariadne}, slideurl = {http://www.slideshare.net/lordPretzel/2013-debs-Ariadne}, title = {Ariadne: Managing Fine-Grained Provenance on Data Streams}, venueshort = {DEBS}, year = {2013}, bdsk-url-1 = {http://cs.iit.edu/%7edbgroup/assets/pdfpubls/GE13.pdf} }