We present the first query-based approach for explaining missing answers to queries over nested relational data which is a common data format used by big data systems such as Apache Spark. Our main contributions are a novel way to define query-based why-not provenance based on repairs to queries and presenting an implementation and preliminary experiments for answering such queries in Spark.
@inproceedings{DG19a, author = {Diestelk\"amper, Ralf and Glavic, Boris and Herschel, Melanie and Lee, Seokki}, booktitle = {Proceedings of the 11th USENIX Workshop on the Theory and Practice of Provenance}, isworkshop = {true}, keywords = {Provenance; Missing Answers}, pdfurl = {http://cs.iit.edu/%7edbgroup/assets/pdfpubls/DG19.pdf}, title = {Query-based Why-not Explanations for Nested Data}, venueshort = {TaPP}, year = {2019} }