@inproceedings{10.1109/SC41406.2024.00114, author = {Logan, Luke and Kougkas, Anthony and Sun, Xian-He}, title = {MegaMmap: Blurring the Boundary Between Memory and Storage for Data-Intensive Workloads}, year = {2024}, isbn = {9798350352917}, publisher = {IEEE Press}, url = {https://doi.org/10.1109/SC41406.2024.00114}, doi = {10.1109/SC41406.2024.00114}, abstract = {Large-scale data analytics, scientific simulation, and deep learning codes in HPC perform massive computations on data greatly exceeding the bounds of main memory. These out-of-core algorithms suffer from severe data movement penalties, programming complexity, and limited code reuse. To solve this, HPC sites have steadily increased DRAM capacity. However, this is not sustainable due to financial and environmental costs. A more elegant, low-cost, and portable solution is to expand memory to distributed multi-tiered storage. In this work, we propose MegaMmap: a software distributed shared memory (DSM) that enlarges effective memory capacity through intelligent tiered DRAM and storage management. MegaMmap provides workload-aware data organization, eviction, and prefetching policies to reduce DRAM consumption while ensuring speedy access to critical data. A variety of memory coherence optimizations are provided through an intuitive hinting system. Evaluations show that various workloads can be executed with a fraction of the DRAM while offering competitive performance.}, booktitle = {Proceedings of the International Conference for High Performance Computing, Networking, Storage, and Analysis}, articleno = {108}, numpages = {18}, keywords = {HPC, Memory Tiering, Storage Tiering, Systems Software}, location = {Atlanta, GA, USA}, series = {SC '24} }