@inproceedings{10.1145/3673038.3673150, author = {Cernuda, Jaime and Ye, Jie and Kougkas, Anthony and Sun, Xian-He}, title = {HStream: A hierarchical data streaming engine for high-throughput scientific applications}, year = {2024}, isbn = {9798400717932}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3673038.3673150}, doi = {10.1145/3673038.3673150}, abstract = {Data streaming is gaining traction in high-performance computing (HPC) as a mechanism for continuous data transfer, but remains underutilized as a processing paradigm due to the inadequacy of existing technologies, which are primarily designed for cloud architectures and ill-equipped to tackle HPC-specific challenges. This work introduces HStream, a novel data management design for out-of-core data streaming engines. Central to the HStream design is the separation of data and computing planes at the task level. By managing them independently, issues such as memory thrashing and back-pressure, caused by the high volume, velocity, and burstiness of I/O in HPC environments, can be effectively addressed at runtime. Specifically, HStream utilizes adaptive parallelism and hierarchical memory management, enabled by this design paradigm, to alleviate memory pressure and enhance system performance. These improvements enable HStream to match the performance of state-of-the-art HPC streaming engines and achieve up to a 1.5x reduction in latency under high data loads. }, booktitle = {Proceedings of the 53rd International Conference on Parallel Processing}, pages = {231–240}, numpages = {10}, keywords = {Data Streaming, HPC, elastic system, hierarchical storage, in-transit}, location = {Gotland, Sweden}, series = {ICPP '24} }