@article{logan2024daos, author = {Logan, Luke and Lofstead, Jay and Sun, Xian-He and Kougkas, Anthony}, title = {An Evaluation of DAOS for Simulation and Deep Learning HPCWorkloads}, year = {2024}, issue_date = {June 2024}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, volume = {58}, number = {1}, issn = {0163-5980}, url = {https://doi.org/10.1145/3689051.3689058}, doi = {10.1145/3689051.3689058}, abstract = {Traditionally, distributed storage systems have relied upon the interfaces provided by OS kernels to interact with storage hardware. However, much research has shown that OSes impose serious overheads on every I/O operation, especially on high-performance storage and networking hardware (e.g., PMEM and 200GBe). Thus, distributed storage stacks are being re-designed to take advantage of this modern hardware by utilizing new hardware interfaces which bypass the kernel entirely. However, the impact of these optimizations have not been well-studied for real HPC workloads on real hardware. In this work, we provide a comprehensive evaluation of DAOS: a state-of-the-art distributed storage system which re-architects the storage stack from scratch for modern hardware.We compare DAOS against traditional storage stacks and demonstrate that by utilizing optimal interfaces to hardware, performance improvements of up to 6x can be observed in real scientific applications.}, journal = {SIGOPS Oper. Syst. Rev.}, month = {aug}, pages = {37–44}, numpages = {8} }