%%% -*-BibTeX-*- %%% ==================================================================== %%% BibTeX-file{ %%% author = "Nelson H. F. Beebe", %%% version = "1.64", %%% date = "02 February 2026", %%% time = "08:44:28 MDT", %%% filename = "tos.bib", %%% address = "University of Utah %%% Department of Mathematics, 110 LCB %%% 155 S 1400 E RM 233 %%% Salt Lake City, UT 84112-0090 %%% USA", %%% telephone = "+1 801 581 5254", %%% URL = "https://www.math.utah.edu/~beebe", %%% checksum = "08486 20529 107021 1005305", %%% email = "beebe at math.utah.edu, beebe at acm.org, %%% beebe at computer.org (Internet)", %%% codetable = "ISO/ASCII", %%% keywords = "bibliography; BibTeX; ACM Transactions on %%% Storage; TOS", %%% license = "public domain", %%% supported = "yes", %%% docstring = "This is a COMPLETE BibTeX bibliography for %%% ACM Transactions on Storage (CODEN ????, ISSN %%% 1553-3077 (print), 1553-3093 (electronic)), %%% completely covering all issues from volume 1, %%% number 1, February 2005 to date. %%% %%% The ACM maintains World Wide Web pages with %%% journal tables of contents for 2005--date at %%% %%% https://dl.acm.org/loi/tos %%% http://www.acm.org/tos/ %%% http://www.acm.org/pubs/contents/journals/tos/ %%% http://portal.acm.org/browse_dl.cfm?idx=J960 %%% %%% That data has been automatically converted to %%% BibTeX form, corrected for spelling and page %%% number errors, and merged into this file. %%% %%% At version 1.64, the COMPLETE year coverage %%% looks like this: %%% %%% 2005 ( 17) 2013 ( 14) 2021 ( 32) %%% 2006 ( 18) 2014 ( 17) 2022 ( 36) %%% 2007 ( 13) 2015 ( 19) 2023 ( 38) %%% 2008 ( 15) 2016 ( 24) 2024 ( 27) %%% 2009 ( 21) 2017 ( 40) 2025 ( 41) %%% 2010 ( 14) 2018 ( 36) 2026 ( 10) %%% 2011 ( 14) 2019 ( 22) %%% 2012 ( 18) 2020 ( 35) %%% %%% Article: 521 %%% %%% Total entries: 521 %%% %%% Spelling has been verified with the UNIX %%% spell and GNU ispell programs using the %%% exception dictionary stored in the companion %%% file with extension .sok. %%% %%% ACM copyrights explicitly permit abstracting %%% with credit, so article abstracts, keywords, %%% and subject classifications have been %%% included in this bibliography wherever %%% available. Article reviews have been %%% omitted, until their copyright status has %%% been clarified. %%% %%% bibsource keys in the bibliography entries %%% below indicate the entry originally came %%% from the computer science bibliography %%% archive, even though it has likely since %%% been corrected and updated. %%% %%% URL keys in the bibliography point to %%% World Wide Web locations of additional %%% information about the entry. %%% %%% BibTeX citation tags are uniformly chosen %%% as name:year:abbrev, where name is the %%% family name of the first author or editor, %%% year is a 4-digit number, and abbrev is a %%% 3-letter condensation of important title %%% words. Citation tags were automatically %%% generated by software developed by the %%% author for the BibNet Project. %%% %%% In this bibliography, entries are sorted %%% by journal, and then by publication order, %%% with the help of ``bibsort -byvolume''. The %%% bibsort utility is available from %%% ftp://ftp.math.utah.edu/pub/tex/bib. %%% %%% The author will be grateful for reports of %%% errors of any kind in this bibliography. %%% %%% The checksum field above contains a CRC-16 %%% checksum as the first value, followed by the %%% equivalent of the standard UNIX wc (word %%% count) utility output of lines, words, and %%% characters. This is produced by Robert %%% Solovay's checksum utility." %%% } %%% ==================================================================== @Preamble{"\input bibnames.sty" # "\hyphenation{ }" # "\ifx \undefined \circled \def \circled #1{(#1)}\fi" # "\ifx \undefined \reg \def \reg {\circled{R}}\fi" } %%% ==================================================================== %%% Acknowledgement abbreviations: @String{ack-nhfb = "Nelson H. F. Beebe, University of Utah, Department of Mathematics, 110 LCB, 155 S 1400 E RM 233, Salt Lake City, UT 84112-0090, USA, Tel: +1 801 581 5254, e-mail: \path|beebe@math.utah.edu|, \path|beebe@acm.org|, \path|beebe@computer.org| (Internet), URL: \path|https://www.math.utah.edu/~beebe/|"} %%% ==================================================================== %%% Journal abbreviations: @String{j-TOS = "ACM Transactions on Storage"} %%% ==================================================================== %%% Bibliography entries sorted in publication order: @Article{Rajan:2005:E, author = "Sreeranga P. Rajan", title = "Editorial", journal = j-TOS, volume = "1", number = "1", pages = "1--2", month = feb, year = "2005", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Apr 14 12:33:44 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Yu:2005:CAR, author = "Haifeng Yu and Amin Vahdat", title = "Consistent and automatic replica regeneration", journal = j-TOS, volume = "1", number = "1", pages = "3--37", month = feb, year = "2005", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Apr 14 12:33:44 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Huang:2005:CRK, author = "Andrew C. Huang and Armando Fox", title = "Cheap recovery: a key to self-managing state", journal = j-TOS, volume = "1", number = "1", pages = "38--70", month = feb, year = "2005", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Apr 14 12:33:44 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Ellard:2005:DPE, author = "Daniel Ellard and James Megquier", title = "{DISP}: {Practical}, efficient, secure and fault-tolerant distributed data storage", journal = j-TOS, volume = "1", number = "1", pages = "71--94", month = feb, year = "2005", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Apr 14 12:33:44 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Hughes:2005:RSR, author = "Gordon F. Hughes and Joseph F. Murray", title = "Reliability and security of {RAID} storage systems and {D2D} archives using {SATA} disk drives", journal = j-TOS, volume = "1", number = "1", pages = "95--107", month = feb, year = "2005", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Apr 14 12:33:44 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Wu:2005:TRL, author = "Changxun Wu and Randal Burns", title = "Tunable randomization for load management in shared-disk clusters", journal = j-TOS, volume = "1", number = "1", pages = "108--131", month = feb, year = "2005", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Apr 14 12:33:44 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Sivathanu:2005:ISS, author = "Muthian Sivathanu and Vijayan Prabhakaran and Andrea C. Arpaci-Dusseau and Remzi H. Arpaci-Dusseau", title = "Improving storage system availability with {D-GRAID}", journal = j-TOS, volume = "1", number = "2", pages = "133--170", month = may, year = "2005", CODEN = "????", DOI = "https://doi.org/10.1145/1063786.1063787", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Jul 7 13:56:40 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Jiang:2005:NFS, author = "Anxiao (Andrew) Jiang and Jehoshua Bruck", title = "Network file storage with graceful performance degradation", journal = j-TOS, volume = "1", number = "2", pages = "171--189", month = may, year = "2005", CODEN = "????", DOI = "https://doi.org/10.1145/1063786.1063788", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Jul 7 13:56:40 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Peterson:2005:ETS, author = "Zachary Peterson and Randal Burns", title = "{Ext3cow}: a time-shifting file system for regulatory compliance", journal = j-TOS, volume = "1", number = "2", pages = "190--212", month = may, year = "2005", CODEN = "????", DOI = "https://doi.org/10.1145/1063786.1063789", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Jul 7 13:56:40 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "http://hssl.cs.jhu.edu/~zachary/papers/peterson-tos05.pdf", abstract = "The ext3cow file system, built on the popular ext3 file system, provides an open-source file versioning and snapshot platform for compliance with the versioning and auditability requirements of recent electronic record retention legislation. Ext3cow provides a time-shifting interface that permits a real-time and continuous view of data in the past. Time-shifting does not pollute the file system namespace nor require snapshots to be mounted as a separate file system. Further, ext3cow is implemented entirely in the file system space and, therefore, does not modify kernel interfaces or change the operation of other file systems. Ext3cow takes advantage of the fine-grained control of on-disk and in-memory data available only to a file system, resulting in minimal degradation of performance and functionality. Experimental results confirm this hypothesis; ext3cow performs comparably to ext3 on many benchmarks and on trace-driven experiments.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Li:2005:MBC, author = "Zhenmin Li and Zhifeng Chen and Yuanyuan Zhou", title = "Mining block correlations to improve storage performance", journal = j-TOS, volume = "1", number = "2", pages = "213--245", month = may, year = "2005", CODEN = "????", DOI = "https://doi.org/10.1145/1063786.1063790", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Jul 7 13:56:40 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Garg:2005:BDD, author = "Nitin Garg and Sumeet Sobti and Junwen Lai and Fengzhou Zheng and Kai Li and Randolph Y. Wang and Arvind Krishnamurthy", title = "Bridging the digital divide: storage media $+$ postal network $=$ generic high-bandwidth communication", journal = j-TOS, volume = "1", number = "2", pages = "246--275", month = may, year = "2005", CODEN = "????", DOI = "https://doi.org/10.1145/1063786.1063791", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Jul 7 13:56:40 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Making high-bandwidth Internet access pervasively available to a large worldwide audience is a difficult challenge, especially in many developing regions. As we wait for the uncertain takeoff of technologies that promise to improve the situation, we propose to explore an approach that is potentially more easily realizable: the use of digital storage media transported by the postal system as a general digital communication mechanism. We shall call such a system a Postmanet. Compared to more conventional wide-area connectivity options, the Postmanet has several important advantages, including wide global reach, great bandwidth potential, low cost, and ease of incremental adoption. While the idea of sending digital content via the postal system is not a new one, none of the existing attempts have turned the postal system into a generic and transparent communication channel that not only can cater to a wide array of applications, but also effectively manage the many idiosyncrasies associated with using the postal system. In the proposed Postmanet, we see two recurring themes at many different levels of the system. One is the simultaneous exploitation of the Internet and the postal system so we can combine their latency and bandwidth advantages. The other is the exploitation of the abundant capacity and bandwidth of the Postmanet to improve its latency, cost, and reliability.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Ganesan:2005:MSS, author = "Deepak Ganesan and Ben Greenstein and Deborah Estrin and John Heidemann and Ramesh Govindan", title = "Multiresolution storage and search in sensor networks", journal = j-TOS, volume = "1", number = "3", pages = "277--315", month = aug, year = "2005", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 17 15:49:46 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Seo:2005:EDR, author = "Beomjoo Seo and Roger Zimmermann", title = "Efficient disk replacement and data migration algorithms for large disk subsystems", journal = j-TOS, volume = "1", number = "3", pages = "316--345", month = aug, year = "2005", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 17 15:49:46 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Li:2005:PDE, author = "Xiaodong Li and Zhenmin Li and Yuanyuan Zhou and Sarita Adve", title = "Performance directed energy management for main memory and disks", journal = j-TOS, volume = "1", number = "3", pages = "346--380", month = aug, year = "2005", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 17 15:49:46 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Chang:2005:EML, author = "Li-Pin Chang and Tei-Wei Kuo", title = "Efficient management for large-scale flash-memory storage systems with resource conservation", journal = j-TOS, volume = "1", number = "4", pages = "381--418", month = nov, year = "2005", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri May 26 08:38:08 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Anastasiadis:2005:SFT, author = "Stergios V. Anastasiadis and Kenneth C. Sevcik and Michael Stumm", title = "Scalable and fault-tolerant support for variable bit-rate data in the {Exedra} streaming server", journal = j-TOS, volume = "1", number = "4", pages = "419--456", month = nov, year = "2005", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri May 26 08:38:08 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Karlsson:2005:TPD, author = "Magnus Karlsson and Christos Karamanolis and Xiaoyun Zhu", title = "{Triage}: Performance differentiation for storage systems using adaptive control", journal = j-TOS, volume = "1", number = "4", pages = "457--480", month = nov, year = "2005", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri May 26 08:38:08 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Hong:2006:UMBa, author = "Bo Hong and Feng Wang and Scott A. Brandt and Darrell D. E. Long and Thomas J. E. Schwarz and S. J.", title = "Using {MEMS}-based storage in computer systems---{MEMS} storage architectures", journal = j-TOS, volume = "2", number = "1", pages = "1--21", month = feb, year = "2006", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Aug 23 05:41:22 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Hsieh:2006:EIH, author = "Jen-Wei Hsieh and Tei-Wei Kuo and Li-Pin Chang", title = "Efficient identification of hot data for flash memory storage systems", journal = j-TOS, volume = "2", number = "1", pages = "22--40", month = feb, year = "2006", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Aug 23 05:41:22 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Gurumurthi:2006:TID, author = "Sudhanva Gurumurthi and Anand Sivasubramaniam", title = "Thermal issues in disk drive design: Challenges and possible solutions", journal = j-TOS, volume = "2", number = "1", pages = "41--73", month = feb, year = "2006", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Aug 23 05:41:22 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Wright:2006:VUS, author = "Charles P. Wright and Jay Dave and Puja Gupta and Harikesavan Krishnan and David P. Quigley and Erez Zadok and Mohammad Nayyer Zubair", title = "Versatility and {Unix} semantics in namespace unification", journal = j-TOS, volume = "2", number = "1", pages = "74--105", month = feb, year = "2006", CODEN = "????", DOI = "https://doi.org/10.1145/1138041.1138045", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Aug 23 05:41:22 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Administrators often prefer to keep related sets of files in different locations or media, as it is easier to maintain them separately. Users, however, prefer to see all files in one location for convenience. One solution that accommodates both needs is virtual namespace unification---providing a merged view of several directories without physically merging them. For example, namespace unification can merge the contents of several CD-ROM images without unpacking them, merge binary directories from different packages, merge views from several file servers, and more. Namespace unification can also enable snapshotting by marking some data sources read-only and then utilizing copy-on-write for the read-only sources. For example, an OS image may be contained on a read-only CD-ROM image---and the user's configuration, data, and programs could be stored in a separate read-write directory. With copy-on-write unification, the user need not be concerned about the two disparate file systems. It is difficult to maintain Unix semantics while offering a versatile namespace unification system. Past efforts to provide such unification often compromised on the set of features provided or Unix compatibility---resulting in an incomplete solution that users could not use. We designed and implemented a versatile namespace unification system called Unionfs. Unionfs maintains Unix semantics while offering advanced namespace unification features: dynamic insertion and removal of namespaces at any point in the merged view, mixing read-only and read-write components, efficient in-kernel duplicate elimination, NFS interoperability, and more. Since releasing our Linux implementation, it has been used by thousands of users and over a dozen Linux distributions, which helped us discover and solve many practical problems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Mykletun:2006:AIO, author = "Einar Mykletun and Maithili Narasimha and Gene Tsudik", title = "Authentication and integrity in outsourced databases", journal = j-TOS, volume = "2", number = "2", pages = "107--138", month = may, year = "2006", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Aug 23 05:41:22 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Hong:2006:UMBb, author = "Bo Hong and Scott A. Brandt and Darrell D. E. Long and Ethan L. Miller and Ying Lin", title = "Using {MEMS}-based storage in computer systems---device modeling and management", journal = j-TOS, volume = "2", number = "2", pages = "139--160", month = may, year = "2006", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Aug 23 05:41:22 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Zadok:2006:IFS, author = "Erez Zadok and Rakesh Iyer and Nikolai Joukov and Gopalan Sivathanu and Charles P. Wright", title = "On incremental file system development", journal = j-TOS, volume = "2", number = "2", pages = "161--196", month = may, year = "2006", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Aug 23 05:41:22 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Sugahara:2006:SMB, author = "Satoshi Sugahara and Masaaki Tanaka", title = "Spin {MOSFETs} as a basis for spintronics", journal = j-TOS, volume = "2", number = "2", pages = "197--219", month = may, year = "2006", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Aug 23 05:41:22 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Vazhkudai:2006:CCD, author = "Sudharshan S. Vazhkudai and Xiaosong Ma and Vincent W. Freeh and Jonathan W. Strickland and Nandan Tammineedi and Tyler Simon and Stephen L. Scott", title = "Constructing collaborative desktop storage caches for large scientific datasets", journal = j-TOS, volume = "2", number = "3", pages = "221--254", month = aug, year = "2006", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Apr 14 11:04:31 MDT 2007", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Won:2006:ISC, author = "Youjip Won and Hyungkyu Chang and Jaemin Ryu and Yongdai Kim and Junseok Shim", title = "Intelligent storage: Cross-layer optimization for soft real-time workload", journal = j-TOS, volume = "2", number = "3", pages = "255--282", month = aug, year = "2006", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Apr 14 11:04:31 MDT 2007", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Zhang:2006:SPV, author = "Jianyong Zhang and Anand Sivasubramaniam and Qian Wang and Alma Riska and Erik Riedel", title = "Storage performance virtualization via throughput and latency control", journal = j-TOS, volume = "2", number = "3", pages = "283--308", month = aug, year = "2006", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Apr 14 11:04:31 MDT 2007", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Wang:2006:CFS, author = "An-I Andy Wang and Geoff Kuenning and Peter Reiher and Gerald Popek", title = "The {{\em Conquest\/}} file system: Better performance through a disk\slash persistent-{RAM} hybrid design", journal = j-TOS, volume = "2", number = "3", pages = "309--348", month = aug, year = "2006", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Apr 14 11:04:31 MDT 2007", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Memik:2006:MTE, author = "Gokhan Memik and Mahmut T. Kandemir and Wei-Keng Liao and Alok Choudhary", title = "Multicollective {I/O}: a technique for exploiting inter-file access patterns", journal = j-TOS, volume = "2", number = "3", pages = "349--369", month = aug, year = "2006", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Apr 14 11:04:31 MDT 2007", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Kang:2006:AVA, author = "Sukwoo Kang and A. L. Narasimha Reddy", title = "An approach to virtual allocation in storage systems", journal = j-TOS, volume = "2", number = "4", pages = "371--399", month = nov, year = "2006", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Apr 14 11:04:31 MDT 2007", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Nijim:2006:MIS, author = "Mais Nijim and Xiao Qin and Tao Xie", title = "Modeling and improving security of a local disk system for write-intensive workloads", journal = j-TOS, volume = "2", number = "4", pages = "400--423", month = nov, year = "2006", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Apr 14 11:04:31 MDT 2007", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Bobbarjung:2006:IDE, author = "Deepak R. Bobbarjung and Suresh Jagannathan and Cezary Dubnicki", title = "Improving duplicate elimination in storage systems", journal = j-TOS, volume = "2", number = "4", pages = "424--448", month = nov, year = "2006", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Apr 14 11:04:31 MDT 2007", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Wu:2006:DEI, author = "Chin-Hsien Wu and Tei-Wei Kuo and Li-Pin Chang", title = "The design of efficient initialization and crash recovery for log-based file systems over flash memory", journal = j-TOS, volume = "2", number = "4", pages = "449--467", month = nov, year = "2006", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Apr 14 11:04:31 MDT 2007", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Lin:2006:EID, author = "Song Lin and Demetrios Zeinalipour-Yazti and Vana Kalogeraki and Dimitrios Gunopulos and Walid A. Najjar", title = "Efficient indexing data structures for flash-based sensor devices", journal = j-TOS, volume = "2", number = "4", pages = "468--503", month = nov, year = "2006", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Apr 14 11:04:31 MDT 2007", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Arnan:2007:DDR, author = "Ron Arnan and Eitan Bachmat and Tao Kai Lam and Ruben Michel", title = "Dynamic data reallocation in disk arrays", journal = j-TOS, volume = "3", number = "1", pages = "??--??", month = mar, year = "2007", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Apr 14 11:04:31 MDT 2007", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "2", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Kim:2007:ZR, author = "Seon Ho Kim and Hong Zhu and Roger Zimmermann", title = "Zoned-{RAID}", journal = j-TOS, volume = "3", number = "1", pages = "??--??", month = mar, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1227835.1227836", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Apr 14 11:04:31 MDT 2007", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "The RAID (Redundant Array of Inexpensive Disks) system has been widely used in practical storage applications for better performance, cost effectiveness, and reliability. This study proposes a novel variant of RAID named Zoned-RAID (Z-RAID). Z-RAID improves the performance of traditional RAID by utilizing the zoning property of modern disks which provides multiple zones with different data transfer rates within a disk. Z-RAID levels 1, 5, and 6 are introduced to enhance the effective data transfer rate of RAID levels 1, 5, and 6, respectively, by constraining the placement of data blocks in multizone disks. We apply the Z-RAID to a practical and popular application, streaming media server, that requires a high-data transfer rate as well as a high reliability. The analytical and experimental results demonstrate the superiority of Z-RAID to conventional RAID. Z-RAID provides a higher effective data transfer rate in normal mode with no disadvantage. In the presence of a disk failure, Z-RAID still performs as well as RAID.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "1", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Zhang:2007:SEA, author = "Guangyan Zhang and Jiwu Shu and Wei Xue and Weimin Zheng", title = "{SLAS}: An efficient approach to scaling round-robin striped volumes", journal = j-TOS, volume = "3", number = "1", pages = "??--??", month = mar, year = "2007", CODEN = "????", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Apr 14 11:04:31 MDT 2007", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "3", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Wright:2007:EAS, author = "Charles P. Wright and Richard Spillane and Gopalan Sivathanu and Erez Zadok", title = "Extending {ACID} semantics to the file system", journal = j-TOS, volume = "3", number = "2", pages = "4:1--4:??", month = jun, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1242520.1242521", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jun 16 17:36:16 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "An organization's data is often its most valuable asset, but today's file systems provide few facilities to ensure its safety. Databases, on the other hand, have long provided transactions. Transactions are useful because they provide atomicity, consistency, isolation, and durability (ACID). Many applications could make use of these semantics, but databases have a wide variety of nonstandard interfaces. For example, applications like mail servers currently perform elaborate error handling to ensure atomicity and consistency, because it is easier than using a DBMS. A transaction-oriented programming model eliminates complex error-handling code because failed operations can simply be aborted without side effects. We have designed a file system that exports ACID transactions to user-level applications, while preserving the ubiquitous and convenient POSIX interface. In our prototype ACID file system, called Amino, updated applications can protect arbitrary sequences of system calls within a transaction. Unmodified applications operate without any changes, but each system call is transaction protected. We also built a recoverable memory library with support for nested transactions to allow applications to keep their in-memory data structures consistent with the file system. Our performance evaluation shows that ACID semantics can be added to applications with acceptable overheads. When Amino adds atomicity, consistency, and isolation functionality to an application, it performs close to Ext3. Amino achieves durability up to 46\% faster than Ext3, thanks to improved locality.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "4", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "databases; file system transactions; file systems; ptrace monitors; recoverable memory", } @Article{Ding:2007:BCM, author = "Xiaoning Ding and Song Jiang and Feng Chen", title = "A buffer cache management scheme exploiting both temporal and spatial localities", journal = j-TOS, volume = "3", number = "2", pages = "5:1--5:??", month = jun, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1242520.1242522", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jun 16 17:36:16 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "On-disk sequentiality of requested blocks, or their spatial locality, is critical to real disk performance where the throughput of access to sequentially-placed disk blocks can be an order of magnitude higher than that of access to randomly-placed blocks. Unfortunately, spatial locality of cached blocks is largely ignored, and only temporal locality is considered in current system buffer cache managements. Thus, disk performance for workloads without dominant sequential accesses can be seriously degraded. To address this problem, we propose a scheme called DULO (DU al LO cality) which exploits both temporal and spatial localities in the buffer cache management. Leveraging the filtering effect of the buffer cache, DULO can influence the I/O request stream by making the requests passed to the disk more sequential, thus significantly increasing the effectiveness of I/O scheduling and prefetching for disk performance improvements.\par We have implemented a prototype of DULO in Linux 2.6.11. The implementation shows that DULO can significantly increases disk I/O throughput for real-world applications such as a Web server, TPC benchmark, file system benchmark, and scientific programs. It reduces their execution times by as much as 53\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "5", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "caching; file systems; hard disk; spatial locality; temporal locality", } @Article{Rangaswami:2007:BMB, author = "Raju Rangaswami and Zoran Dimitrijevi{\'c} and Edward Chang and Klaus Schauser", title = "Building {MEMS}-based storage systems for streaming media", journal = j-TOS, volume = "3", number = "2", pages = "6:1--6:??", month = jun, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1242520.1242523", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jun 16 17:36:16 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "The performance of streaming media servers has been limited by the dual requirements of high disk throughput (to service more clients simultaneously) and low memory use (to decrease system cost). To achieve high disk throughput, disk drives must be accessed with large IOs to amortize disk access overhead. Large IOs imply an increased requirement of expensive DRAM, and, consequently, greater overall system cost. MEMS-based storage, an emerging storage technology, is predicted to offer a price-performance point between those of DRAM and disk drives. In this study, we propose storage architectures that use the relatively inexpensive MEMS-based storage devices as an intermediate layer (between DRAM and disk drives) for temporarily staging large disk IOs at a significantly lower cost. We present data layout mechanisms and synchronized IO scheduling algorithms for the real-time storage and retrieval of streaming data within such an augmented storage system. Analytical evaluation suggests that MEMS-augmented storage hierarchies can reduce the cost and improve the throughput of streaming servers significantly.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "6", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "I/O scheduling; MEMS-based storage; multidisk storage; storage architecture; streaming media", } @Article{Arpaci-Dusseau:2007:ISI, author = "Andrea Arpaci-Dusseau and Remzi Arpaci-Dusseau", title = "Introduction to special issue {USENIX} {FAST} 2007", journal = j-TOS, volume = "3", number = "3", pages = "7:1--7:??", month = oct, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1288783.1288784", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jun 16 17:36:25 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "7", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Schroeder:2007:UDF, author = "Bianca Schroeder and Garth A. Gibson", title = "Understanding disk failure rates: What does an {MTTF} of 1,000,000 hours mean to you?", journal = j-TOS, volume = "3", number = "3", pages = "8:1--8:??", month = oct, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1288783.1288785", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jun 16 17:36:25 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Component failure in large-scale IT installations is becoming an ever-larger problem as the number of components in a single cluster approaches a million.\par This article is an extension of our previous study on disk failures [Schroeder and Gibson 2007] and presents and analyzes field-gathered disk replacement data from a number of large production systems, including high-performance computing sites and internet services sites. More than 110,000 disks are covered by this data, some for an entire lifetime of five years. The data includes drives with SCSI and FC, as well as SATA interfaces. The mean time-to-failure (MTTF) of those drives, as specified in their datasheets, ranges from 1,000,000 to 1,500,000 hours, suggesting a nominal annual failure rate of at most 0.88\%.\par We find that in the field, annual disk replacement rates typically exceed 1\%, with 2--4\% common and up to 13\% observed on some systems. This suggests that field replacement is a fairly different process than one might predict based on datasheet MTTF.\par We also find evidence, based on records of disk replacements in the field, that failure rate is not constant with age, and that rather than a significant infant mortality effect, we see a significant early onset of wear-out degradation. In other words, the replacement rates in our data grew constantly with age, an effect often assumed not to set in until after a nominal lifetime of 5 years.\par Interestingly, we observe little difference in replacement rates between SCSI, FC, and SATA drives, potentially an indication that disk-independent factors such as operating conditions affect replacement rates more than component-specific ones. On the other hand, we see only one instance of a customer rejecting an entire population of disks as a bad batch, in this case because of media error rates, and this instance involved SATA disks.\par Time between replacement, a proxy for time between failure, is not well modeled by an exponential distribution and exhibits significant levels of correlation, including autocorrelation and long-range dependence.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "8", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "annual failure rates; annual replacement rates; datasheet MTTF; failure correlation; hard drive failure; hard drive replacements; infant mortality; MTTF; storage reliability; time between failure; wear-out", } @Article{Agrawal:2007:FYS, author = "Nitin Agrawal and William J. Bolosky and John R. Douceur and Jacob R. Lorch", title = "A five-year study of file-system metadata", journal = j-TOS, volume = "3", number = "3", pages = "9:1--9:??", month = oct, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1288783.1288788", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jun 16 17:36:25 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "For five years, we collected annual snapshots of file-system metadata from over 60,000 Windows PC file systems in a large corporation. In this article, we use these snapshots to study temporal changes in file size, file age, file-type frequency, directory size, namespace structure, file-system population, storage capacity and consumption, and degree of file modification. We present a generative model that explains the namespace structure and the distribution of directory sizes. We find significant temporal trends relating to the popularity of certain file types, the origin of file content, the way the namespace is used, and the degree of variation among file systems, as well as more pedestrian changes in size and capacities. We give examples of consequent lessons for designers of file systems and related software.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "9", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "file systems; generative model; longitudinal study", } @Article{Gill:2007:OMS, author = "Binny S. Gill and Luis Angel D. Bathen", title = "Optimal multistream sequential prefetching in a shared cache", journal = j-TOS, volume = "3", number = "3", pages = "10:1--10:??", month = oct, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1288783.1288789", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jun 16 17:36:25 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Prefetching is a widely used technique in modern data storage systems. We study the most widely used class of prefetching algorithms known as sequential prefetching. There are two problems that plague the state-of-the-art sequential prefetching algorithms: (i) cache pollution, which occurs when prefetched data replaces more useful prefetched or demand-paged data, and (ii) prefetch wastage, which happens when prefetched data is evicted from the cache before it can be used.\par A sequential prefetching algorithm can have a fixed or adaptive degree of prefetch and can be either synchronous (when it can prefetch only on a miss) or asynchronous (when it can also prefetch on a hit). To capture these distinctions we define four classes of prefetching algorithms: fixed synchronous (FS), fixed asynchronous (FA), adaptive synchronous (AS), and adaptive asynchronous (AsynchA). We find that the relatively unexplored class of AsynchA algorithms is in fact the most promising for sequential prefetching. We provide a first formal analysis of the criteria necessary for optimal throughput when using an AsynchA algorithm in a cache shared by multiple steady sequential streams. We then provide a simple implementation called AMP (adaptive multistream prefetching) which adapts accordingly, leading to near-optimal performance for any kind of sequential workload and cache size.\par Our experimental setup consisted of an IBM xSeries 345 dual processor server running Linux using five SCSI disks. We observe that AMP convincingly outperforms all the contending members of the FA, FS, and AS classes for any number of streams and over all cache sizes. As anecdotal evidence, in an experiment with 100 concurrent sequential streams and varying cache sizes, AMP surpasses the FA, FS, and AS algorithms by 29--172\%, 12--24\%, and 21--210\%, respectively, while outperforming OBL by a factor of 8. Even for complex workloads like SPC1-Read, AMP is consistently the best-performing algorithm. For the SPC2 video-on-demand workload, AMP can sustain at least 25\% more streams than the next best algorithm. Furthermore, for a workload consisting of short sequences, where optimality is more elusive, AMP is able to outperform all the other contenders in overall performance.\par Finally, we implemented AMP in the state-of-the-art enterprise storage system, the IBM system storage DS8000 series. We demonstrated that AMP dramatically improves performance for common sequential and batch processing workloads and delivers up to a twofold increase in the sequential read capacity.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "10", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "adaptive prefetching; asynchronous prefetching; cache pollution; degree of prefetch; fixed prefetching; multistream read; optimal prefetching; prefetch wastage; prestaging; sequential prefetching; synchronous prefetching; trigger distance", } @Article{Yumerefendi:2007:SAN, author = "Aydan R. Yumerefendi and Jeffrey S. Chase", title = "Strong accountability for network storage", journal = j-TOS, volume = "3", number = "3", pages = "11:1--11:??", month = oct, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1288783.1288786", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jun 16 17:36:25 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "This article presents the design, implementation, and evaluation of CATS, a network storage service with strong accountability properties. CATS offers a simple web services interface that allows clients to read and write opaque objects of variable size. This interface is similar to the one offered by existing commercial Internet storage services. CATS extends the functionality of commercial Internet storage services by offering support for strong accountability.\par A CATS server annotates read and write responses with evidence of correct execution, and offers audit and challenge interfaces that enable clients to verify that the server is faithful. A faulty server cannot conceal its misbehavior, and evidence of misbehavior is independently verifiable by any participant. CATS clients are also accountable for their actions on the service. A client cannot deny its actions, and the server can prove the impact of those actions on the state views it presented to other clients.\par Experiments with a CATS prototype evaluate the cost of accountability under a range of conditions and expose the primary factors influencing the level of assurance and the performance of a strongly accountable storage server. The results show that strong accountability is practical for network storage systems in settings with strong identity and modest degrees of write-sharing. We discuss how the accountability concepts and techniques used in CATS generalize to other classes of network services.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "11", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "accountability; accountable services; accountable storage", } @Article{Cipar:2007:CSU, author = "James Cipar and Mark D. Corner and Emery D. Berger", title = "Contributing storage using the transparent file system", journal = j-TOS, volume = "3", number = "3", pages = "12:1--12:??", month = oct, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1288783.1288787", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jun 16 17:36:25 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Contributory applications allow users to donate unused resources on their personal computers to a shared pool. Applications such as SETI@home, Folding@home, and Freenet are now in wide use and provide a variety of services, including data processing and content distribution. However, while several research projects have proposed contributory applications that support peer-to-peer storage systems, their adoption has been comparatively limited. We believe that a key barrier to the adoption of contributory storage systems is that contributing a large quantity of local storage interferes with the principal user of the machine.\par To overcome this barrier, we introduce the Transparent File System (TFS). TFS provides background tasks with large amounts of unreliable storage --- all of the currently available space --- without impacting the performance of ordinary file access operations. We show that TFS allows a peer-to-peer contributory storage system to provide 40\% more storage at twice the performance when compared to a user-space storage mechanism. We analyze the impact of TFS on replication in peer-to-peer storage systems and show that TFS does not appreciably increase the resources needed for file replication.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "12", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "aging; contributory systems; fragmentation; peer-to-peer", } @Article{Weddle:2007:PGS, author = "Charles Weddle and Mathew Oldham and Jin Qian and An-I Andy Wang and Peter Reiher and Geoff Kuenning", title = "{PARAID}: a gear-shifting power-aware {RAID}", journal = j-TOS, volume = "3", number = "3", pages = "13:1--13:??", month = oct, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1288783.1288787", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jun 16 17:36:25 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Reducing power consumption for server-class computers is important, since increased energy usage causes more heat dissipation, greater cooling requirements, reduced computational density, and higher operating costs. For a typical data center, storage accounts for 27\% of energy consumption. Conventional server-class RAIDs cannot easily reduce power because loads are balanced to use all disks, even for light loads.\par We have built the power-aware RAID (PARAID), which reduces energy use of commodity server-class disks without specialized hardware. PARAID uses a skewed striping pattern to adapt to the system load by varying the number of powered disks. By spinning disks down during light loads, PARAID can reduce power consumption, while still meeting performance demands, by matching the number of powered disks to the system load. Reliability is achieved by limiting disk power cycles and using different RAID encoding schemes. Based on our five-disk prototype, PARAID uses up to 34\% less power than conventional RAIDs while achieving similar performance and reliability.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "13", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "energy efficiency; power savings; RAID", } @Article{Maccormick:2008:NPR, author = "John Maccormick and Chandramohan A. Thekkath and Marcus Jager and Kristof Roomp and Lidong Zhou and Ryan Peterson", title = "Niobe: a practical replication protocol", journal = j-TOS, volume = "3", number = "4", pages = "1:1--1:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1326542.1326543", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jun 16 17:36:37 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "The task of consistently and reliably replicating data is fundamental in distributed systems, and numerous existing protocols are able to achieve such replication efficiently. When called on to build a large-scale enterprise storage system with built-in replication, we were therefore surprised to discover that no existing protocols met our requirements. As a result, we designed and deployed a new replication protocol called Niobe. Niobe is in the primary-backup family of protocols, and shares many similarities with other protocols in this family. But we believe Niobe is significantly more practical for large-scale enterprise storage than previously published protocols. In particular, Niobe is simple, flexible, has rigorously proven yet simply stated consistency guarantees, and exhibits excellent performance. Niobe has been deployed as the backend for a commercial Internet service; its consistency properties have been proved formally from first principles, and further verified using the TLA + specification language. We describe the protocol itself, the system built to deploy it, and some of our experiences in doing so.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "1", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "enterprise storage; replication", } @Article{Rodeh:2008:BTS, author = "Ohad Rodeh", title = "{B}-trees, shadowing, and clones", journal = j-TOS, volume = "3", number = "4", pages = "2:1--2:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1326542.1326544", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jun 16 17:36:37 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "B-trees are used by many file systems to represent files and directories. They provide guaranteed logarithmic time key-search, insert, and remove. File systems like WAFL and ZFS use shadowing, or copy-on-write, to implement snapshots, crash recovery, write-batching, and RAID. Serious difficulties arise when trying to use B-trees and shadowing in a single system.\par This article is about a set of B-tree algorithms that respects shadowing, achieves good concurrency, and implements cloning (writable snapshots). Our cloning algorithm is efficient and allows the creation of a large number of clones.\par We believe that using our B-trees would allow shadowing file systems to better scale their on-disk data structures.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "2", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "B-trees; concurrency; copy-on-write; shadowing; snapshots", } @Article{Dutta:2008:WBG, author = "Kaushik Dutta and Raju Rangaswami and Sajib Kundu", title = "Workload-based generation of administrator hints for optimizing database storage utilization", journal = j-TOS, volume = "3", number = "4", pages = "3:1--3:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1326542.1326545", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jun 16 17:36:37 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Database storage management at data centers is a manual, time-consuming, and error-prone task. Such management involves regular movement of database objects across storage nodes in an attempt to balance the I/O bandwidth utilization across disk drives. Achieving such balance is critical for avoiding I/O bottlenecks and thereby maximizing the utilization of the storage system. However, manual management of the aforesaid task, apart from increasing administrative costs, encumbers the greater risks of untimely and erroneous operations. We address the preceding concerns with STORM, an automated approach that combines low-overhead information gathering of database access and storage usage patterns with efficient analysis to generate accurate and timely hints for the administrator regarding data movement operations. STORM's primary objective is minimizing the volume of data movement required (to minimize potential down-time or reduction in performance) during the reconfiguration operation, with the secondary constraints of space and balanced I/O-bandwidth-utilization across the storage devices. We analyze and evaluate STORM theoretically, using a simulation framework, as well as experimentally. We show that the dynamic data layout reconfiguration problem is NP-hard and we present a heuristic that provides an approximate solution in $ O(N \log (N / M) + (N / M)^2) $ time, where M is the number of storage devices and $N$ is the total number of database objects residing in the storage devices. A simulation study shows that the heuristic converges to an acceptable solution that is successful in balancing storage utilization with an accuracy that lies within 7\% of the ideal solution. Finally, an experimental study demonstrates that the STORM approach can improve the overall performance of the TPC-C benchmark by as much as 22\%, by reconfiguring an initial random, but evenly distributed, placement of database objects.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "3", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Verma:2008:UBU, author = "Akshat Verma and Rohit Jain and Sugata Ghosal", title = "A utility-based unified disk scheduling framework for shared mixed-media services", journal = j-TOS, volume = "3", number = "4", pages = "4:1--4:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1326542.1326546", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jun 16 17:36:37 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "We present a new disk scheduling framework to address the needs of a shared multimedia service that provides differentiated multilevel quality-of-service for mixed-media workloads. In such a shared service, requests from different users have different associated performance objectives and utilities, in accordance with the negotiated service-level agreements (SLAs). Service providers typically provision resources only for average workload intensity, so it becomes important to handle workload surges in a way that maximizes the utility of the served requests.\par We capture the performance objectives and utilities associated with these multiclass diverse workloads in a unified framework and formulate the disk scheduling problem as a reward maximization problem. We map the reward maximization problem to a minimization problem on graphs and, by novel use of graph-theoretic techniques, design a scheduling algorithm that is computationally efficient and optimal in the class of seek-optimizing algorithms. Comprehensive experimental studies demonstrate that the proposed algorithm outperforms other disk schedulers under all loads, with the performance improvement approaching 100\% under certain high load conditions. In contrast to existing schedulers, the proposed scheduler is extensible to new performance objectives (workload type) and utilities by simply altering the reward functions associated with the requests.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "4", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "disk scheduling; GSP; profit maximization; shortest path", } @Article{Hildrum:2008:SOL, author = "Kirsten Hildrum and Fred Douglis and Joel L. Wolf and Philip S. Yu and Lisa Fleischer and Akshay Katta", title = "Storage optimization for large-scale distributed stream-processing systems", journal = j-TOS, volume = "3", number = "4", pages = "5:1--5:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1326542.1326547", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jun 16 17:36:37 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "We consider storage in an extremely large-scale distributed computer system designed for stream processing applications. In such systems, both incoming data and intermediate results may need to be stored to enable analyses at unknown future times. The quantity of data of potential use would dominate even the largest storage system. Thus, a mechanism is needed to keep the data most likely to be used. One recently introduced approach is to employ retention value functions, which effectively assign each data object a value that changes over time in a prespecified way [Douglis et al.2004]. Storage space for data entering the system is reclaimed automatically by deleting data of the lowest current value. In such large systems, there will naturally be multiple file systems available, each with different properties. Choosing the right file system for a given incoming stream of data presents a challenge. In this article we provide a novel and effective scheme for optimizing the placement of data within a distributed storage subsystem employing retention value functions. The goal is to keep the data of highest overall value, while simultaneously balancing the read load to the file system. The key aspects of such a scheme are quite different from those that arise in traditional file assignment problems. We further motivate this optimization problem and describe a solution, comparing its performance to other reasonable schemes via simulation experiments.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "5", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "file assignment problem; load balancing; optimization; storage management; streaming systems; theory", } @Article{Dholakia:2008:NID, author = "Ajay Dholakia and Evangelos Eleftheriou and Xiao-Yu Hu and Ilias Iliadis and Jai Menon and K. K. Rao", title = "A new intra-disk redundancy scheme for high-reliability {RAID} storage systems in the presence of unrecoverable errors", journal = j-TOS, volume = "4", number = "1", pages = "1:1--1:??", month = may, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1353452.1353453", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jun 16 17:36:45 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Today's data storage systems are increasingly adopting low-cost disk drives that have higher capacity but lower reliability, leading to more frequent rebuilds and to a higher risk of unrecoverable media errors. We propose an efficient intradisk redundancy scheme to enhance the reliability of RAID systems. This scheme introduces an additional level of redundancy inside each disk, on top of the RAID redundancy across multiple disks. The RAID parity provides protection against disk failures, whereas the proposed scheme aims to protect against media-related unrecoverable errors. In particular, we consider an intradisk redundancy architecture that is based on an interleaved parity-check coding scheme, which incurs only negligible I/O performance degradation. A comparison between this coding scheme and schemes based on traditional Reed--Solomon codes and single-parity-check codes is conducted by analytical means. A new model is developed to capture the effect of correlated unrecoverable sector errors. The probability of an unrecoverable failure associated with these schemes is derived for the new correlated model, as well as for the simpler independent error model. We also derive closed-form expressions for the mean time to data loss of RAID-5 and RAID-6 systems in the presence of unrecoverable errors and disk failures. We then combine these results to characterize the reliability of RAID systems that incorporate the intradisk redundancy scheme. Our results show that in the practical case of correlated errors, the interleaved parity-check scheme provides the same reliability as the optimum, albeit more complex, Reed--Solomon coding scheme. Finally, the I/O and throughput performances are evaluated by means of analysis and event-driven simulation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "1", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "file and I/O systems; RAID; reliability analysis; stochastic modeling", } @Article{Essary:2008:PDG, author = "David Essary and Ahmed Amer", title = "Predictive data grouping: Defining the bounds of energy and latency reduction through predictive data grouping and replication", journal = j-TOS, volume = "4", number = "1", pages = "2:1--2:??", month = may, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1353452.1353454", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jun 16 17:36:45 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "We demonstrate that predictive grouping is an effective mechanism for reducing disk arm movement, thereby simultaneously reducing energy consumption and data access latency. We further demonstrate that predictive grouping has untapped dramatic potential to further improve access performance and limit energy consumption. Data retrieval latencies are considered a major bottleneck, and with growing volumes of data and increased storage needs it is only growing in significance. Data storage infrastructure is therefore a growing consumer of energy at data-center scales, while the individual disk is already a significant concern for mobile computing (accounting for almost a third of a mobile system's energy demands). While improving responsiveness of storage subsystems and hence reducing latencies in data retrieval is often considered contradictory with efforts to reduce disk energy consumption, we demonstrate that predictive data grouping has the potential to simultaneously work towards both these goals. Predictive data grouping has advantages in its applicability compared to both prior approaches to reducing latencies and to reducing energy usage. For latencies, grouping can be performed opportunistically, thereby avoiding the serious performance penalties that can be incurred with prior applications of access prediction (such as predictive prefetching of data). For energy, we show how predictive grouping can even save energy use for an individual disk that is never idle.\par Predictive data grouping with effective replication results in a reduction of the overall mechanical movement required to retrieve data. We have built upon our detailed measurements of disk power consumption, and have estimated both the energy expended by a hard disk for its mechanical components, and that needed to move the disk arm. We have further compared, via simulation, three models of predictive grouping of on-disk data, including an optimal arrangement of data that is guaranteed to minimize disk arm movement. These experiments have allowed us to measure the limits of performance improvement achievable with optimal data grouping and replication strategies on a single device, and have further allowed us to demonstrate the potential of such schemes to reduce energy consumption of mechanical components by up to 70\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "2", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "data grouping; latency; layout optimization; power; power management; replication", } @Article{Tran:2008:NAD, author = "Dinh Nguyen Tran and Phung Chinh Huynh and Y. C. Tay and Anthony K. H. Tung", title = "A new approach to dynamic self-tuning of database buffers", journal = j-TOS, volume = "4", number = "1", pages = "3:1--3:??", month = may, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1353452.1353455", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jun 16 17:36:45 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Current businesses rely heavily on efficient access to their databases. Manual tuning of these database systems by performance experts is increasingly infeasible: For small companies, hiring an expert may be too expensive; for large enterprises, even an expert may not fully understand the interaction between a large system and its multiple changing workloads. This trend has led major vendors to offer tools that automatically and dynamically tune a database system.\par Many database tuning knobs concern the buffer pool for caching data and disk pages. Specifically, these knobs control the buffer allocation and thus the cache miss probability, which has direct impact on performance.\par Previous methods for automatic buffer tuning are based on simulation, black-box control, gradient descent, and empirical equations. This article presents a new approach, using calculations with an analytically-derived equation that relates miss probability to buffer allocation; this equation fits four buffer replacement policies, as well as twelve datasets from mainframes running commercial databases in large corporations.\par The equation identifies a buffer-size limit that is useful for buffer tuning and powering down idle buffers. It can also replace simulation in predicting I/O costs. Experiments with PostgreSQL illustrate how the equation can help optimize online buffer partitioning, ensure fairness in buffer reclamation, and dynamically retune the allocation when workloads change. It is also used, in conjunction with DB2's interface for retrieving miss data, for tuning DB2 buffer allocation to achieve targets for differentiated service.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "3", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "autonomic computing; buffer allocation; miss probability", } @Article{Matthews:2008:ITM, author = "Jeanna Matthews and Sanjeev Trika and Debra Hensgen and Rick Coulson and Knut Grimsrud", title = "Intel{\reg} Turbo Memory: Nonvolatile disk caches in the storage hierarchy of mainstream computer systems", journal = j-TOS, volume = "4", number = "2", pages = "4:1--4:??", month = may, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1367829.1367830", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jun 16 17:36:51 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Hard-disk drives are a significant bottleneck to system performance and are also responsible for a significant fraction of total system power consumption. Intel Turbo Memory addresses these problems by adding a new layer to the storage hierarchy: a platform-based and nonvolatile, disk cache. In this article, we describe the hardware and software elements of the Intel Turbo Memory architecture. We show how it supports the new ReadyBoost and ReadyDrive features in Microsoft Vista and describe its key caching algorithms. We present performance, power savings, and wear-leveling results achieved by Intel Turbo Memory.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "4", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "disk cache; NAND; nonvolatile memory; solid-state disk; write-back", } @Article{Traeger:2008:NYS, author = "Avishay Traeger and Erez Zadok and Nikolai Joukov and Charles P. Wright", title = "A nine year study of file system and storage benchmarking", journal = j-TOS, volume = "4", number = "2", pages = "5:1--5:??", month = may, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1367829.1367831", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jun 16 17:36:51 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Benchmarking is critical when evaluating performance, but is especially difficult for file and storage systems. Complex interactions between I/O devices, caches, kernel daemons, and other OS components result in behavior that is rather difficult to analyze. Moreover, systems have different features and optimizations, so no single benchmark is always suitable. The large variety of workloads that these systems experience in the real world also adds to this difficulty.\par In this article we survey 415 file system and storage benchmarks from 106 recent papers. We found that most popular benchmarks are flawed and many research papers do not provide a clear indication of true performance. We provide guidelines that we hope will improve future performance evaluations. To show how some widely used benchmarks can conceal or overemphasize overheads, we conducted a set of experiments. As a specific example, slowing down read operations on ext2 by a factor of 32 resulted in only a 2--5\% wall-clock slowdown in a popular compile benchmark. Finally, we discuss future work to improve file system and storage benchmarking.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "5", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "benchmarks; file systems; storage systems", } @Article{Baker:2008:ISI, author = "Mary Baker", title = "Introduction to special issue of {USENIX FAST 2008}", journal = j-TOS, volume = "4", number = "3", pages = "6:1--6:??", month = nov, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1416944.1416945", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:33:07 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "6", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Jiang:2008:DDC, author = "Weihang Jiang and Chongfeng Hu and Yuanyuan Zhou and Arkady Kanevsky", title = "Are disks the dominant contributor for storage failures?: a comprehensive study of storage subsystem failure characteristics", journal = j-TOS, volume = "4", number = "3", pages = "7:1--7:??", month = nov, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1416944.1416946", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:33:07 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Building reliable storage systems becomes increasingly challenging as the complexity of modern storage systems continues to grow. Understanding storage failure characteristics is crucially important for designing and building a reliable storage system. While several recent studies have been conducted on understanding storage failures, almost all of them focus on the failure characteristics of one component --- disks --- and do not study other storage component failures.\par This article analyzes the failure characteristics of storage subsystems. More specifically, we analyzed the storage logs collected from about 39,000 storage systems commercially deployed at various customer sites. The dataset covers a period of 44 months and includes about 1,800,000 disks hosted in about 155,000 storage-shelf enclosures. Our study reveals many interesting findings, providing useful guidelines for designing reliable storage systems. Some of our major findings include: (1) In addition to disk failures that contribute to 20--55\% of storage subsystem failures, other components such as physical interconnects and protocol stacks also account for a significant percentage of storage subsystem failures. (2) Each individual storage subsystem failure type, and storage subsystem failure as a whole, exhibits strong self-correlations. In addition, these failures exhibit ``bursty'' patterns. (3) Storage subsystems configured with redundant interconnects experience 30--40\% lower failure rates than those with a single interconnect. (4) Spanning disks of a RAID group across multiple shelves provides a more resilient solution for storage subsystems than within a single shelf.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "7", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "disk failures; failure characteristics; storage subsystem; Storage system", } @Article{Bairavasundaram:2008:ADC, author = "Lakshmi N. Bairavasundaram and Andrea C. Arpaci-Dusseau and Remzi H. Arpaci-Dusseau and Garth R. Goodson and Bianca Schroeder", title = "An analysis of data corruption in the storage stack", journal = j-TOS, volume = "4", number = "3", pages = "8:1--8:??", month = nov, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1416944.1416947", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:33:07 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "An important threat to reliable storage of data is silent data corruption. In order to develop suitable protection mechanisms against data corruption, it is essential to understand its characteristics. In this article, we present the first large-scale study of data corruption. We analyze corruption instances recorded in production storage systems containing a total of 1.53 million disk drives, over a period of 41 months. We study three classes of corruption: checksum mismatches, identity discrepancies, and parity inconsistencies. We focus on checksum mismatches since they occur the most.\par We find more than 400,000 instances of checksum mismatches over the 41-month period. We find many interesting trends among these instances, including: (i) nearline disks (and their adapters) develop checksum mismatches an order of magnitude more often than enterprise-class disk drives, (ii) checksum mismatches within the same disk are not independent events and they show high spatial and temporal locality, and (iii) checksum mismatches across different disks in the same storage system are not independent. We use our observations to derive lessons for corruption-proof system design.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "8", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "Data corruption; disk drive reliability", } @Article{Tsafrir:2008:PSF, author = "Dan Tsafrir and Tomer Hertz and David Wagner and Dilma {Da Silva}", title = "Portably solving file races with hardness amplification", journal = j-TOS, volume = "4", number = "3", pages = "9:1--9:??", month = nov, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1416944.1416948", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:33:07 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "The file-system API of contemporary systems makes programs vulnerable to TOCTTOU (time-of-check-to-time-of-use) race conditions. Existing solutions either help users to detect these problems (by pinpointing their locations in the code), or prevent the problem altogether (by modifying the kernel or its API). But the latter alternative is not prevalent, and the former is just the first step: Programmers must still address TOCTTOU flaws within the limits of the existing API with which several important tasks cannot be accomplished in a portable straightforward manner. Recently, Dean and Hu [2004] addressed this problem and suggested a probabilistic hardness amplification approach that alleviated the matter. Alas, shortly after, Borisov et al. [2005] responded with an attack termed ``filesystem maze'' that defeated the new approach.\par We begin by noting that mazes constitute a generic way to deterministically win many TOCTTOU races (gone are the days when the probability was small). In the face of this threat, we: (1) develop a new user-level defense that can withstand mazes; and (2) show that our method is undefeated even by much stronger hypothetical attacks that provide the adversary program with ideal conditions to win the race (enjoying complete and instantaneous knowledge about the defending program's actions and being able to perfectly synchronize accordingly). The fact that our approach is immune to these unrealistic attacks suggests it can be used as a simple and portable solution to a large class of TOCTTOU vulnerabilities, without requiring modifications to the underlying operating system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "9", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "Race conditions; time-of-check-to-time-of-use; TOCTTOU", } @Article{Narayanan:2008:WLP, author = "Dushyanth Narayanan and Austin Donnelly and Antony Rowstron", title = "Write off-loading: Practical power management for enterprise storage", journal = j-TOS, volume = "4", number = "3", pages = "10:1--10:??", month = nov, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1416944.1416949", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:33:07 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "In enterprise data centers power usage is a problem impacting server density and the total cost of ownership. Storage uses a significant fraction of the power budget and there are no widely deployed power-saving solutions for enterprise storage systems. The traditional view is that enterprise workloads make spinning disks down ineffective because idle periods are too short. We analyzed block-level traces from 36 volumes in an enterprise data center for one week and concluded that significant idle periods exist, and that they can be further increased by modifying the read/write patterns using {\em write off-loading}. Write off-loading allows write requests on spun-down disks to be temporarily redirected to persistent storage elsewhere in the data center.\par The key challenge is doing this transparently and efficiently at the block level, without sacrificing consistency or failure resilience. We describe our write off-loading design and implementation that achieves these goals. We evaluate it by replaying portions of our traces on a rack-based testbed. Results show that just spinning disks down when idle saves 28--36\% of energy, and write off-loading further increases the savings to 45--60\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "10", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "disk spin-down; DiskEnergy; energy; enterprise storage; power; write off-loading", } @Article{MacCormick:2009:KNA, author = "John MacCormick and Nicholas Murphy and Venugopalan Ramasubramanian and Udi Wieder and Junfeng Yang and Lidong Zhou", title = "Kinesis: a new approach to replica placement in distributed storage systems", journal = j-TOS, volume = "4", number = "4", pages = "11:1--11:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1480439.1480440", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:33:20 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Kinesis is a novel data placement model for distributed storage systems. It exemplifies three design principles: {\em structure\/} (division of servers into a few failure-isolated segments), {\em freedom of choice\/} (freedom to allocate the best servers to store and retrieve data based on current resource availability), and {\em scattered distribution\/} (independent, pseudo-random spread of replicas in the system). These design principles enable storage systems to achieve balanced utilization of storage and network resources in the presence of incremental system expansions, failures of single and shared components, and skewed distributions of data size and popularity. In turn, this ability leads to significantly reduced resource provisioning costs, good user-perceived response times, and fast, parallelized recovery from independent and correlated failures.\par This article validates Kinesis through theoretical analysis, simulations, and experiments on a prototype implementation. Evaluations driven by real-world traces show that Kinesis can significantly outperform the widely used Chain replica-placement strategy in terms of resource requirements, end-to-end delay, and failure recovery.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "11", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "load balancing; multiple-choice paradigm; Storage system", } @Article{Huang:2009:QSS, author = "Chih-Yuan Huang and Tei-Wei Kuo and Ai-Chun Pang", title = "{QoS} for storage subsystems using {IEEE-1394}", journal = j-TOS, volume = "4", number = "4", pages = "12:1--12:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1480439.1480441", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:33:20 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "IEEE-1394 is widely adopted in various commercial products for computing, communication, and entertainment. Although many services with Quality-of-Service (QoS) supports are now available in systems over IEEE-1394, little work is done for QoS-based resource allocation. In this article, we aim at the design of a bandwidth reservation mechanism and its policy for isochronous requests, such as those from cameras. We then address the QoS support issue for asynchronous requests, such as those from disks, and an analytic framework for probability-based QoS guarantees. This work is concluded by the proposing of a topology configuration algorithm for IEEE-1394 devices. The capability of the proposed methodology and the analytic framework are evaluated by a series of experiments over a Linux-based system prototype.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "12", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "I/O subsystem; IEEE-1394; quality-of-service; real time", } @Article{Anastasiadis:2009:RFA, author = "Stergios V. Anastasiadis and Rajiv G. Wickremesinghe and Jeffrey S. Chase", title = "Rethinking {FTP}: Aggressive block reordering for large file transfers", journal = j-TOS, volume = "4", number = "4", pages = "13:1--13:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1480439.1480442", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:33:20 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Whole-file transfer is a basic primitive for Internet content dissemination. Content servers are increasingly limited by disk arm movement, given the rapid growth in disk density, disk transfer rates, server network bandwidth, and content size. Individual file transfers are sequential, but the block access sequence on a content server is effectively random when many slow clients access large files concurrently. Although larger blocks can help improve disk throughput, buffering requirements increase linearly with block size.\par This article explores a novel block reordering technique that can reduce server disk traffic significantly when large content files are shared. The idea is to transfer blocks to each client in any order that is convenient for the server. The server sends blocks to each client opportunistically in order to maximize the advantage from the disk reads it issues to serve other clients accessing the same file. We first illustrate the motivation and potential impact of aggressive block reordering using simple analytical models. Then we describe a file transfer system using a simple block reordering algorithm, called Circus. Experimental results with the Circus prototype show that it can improve server throughput by a factor of two or more in workloads with strong file access locality.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "13", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "Disk access; file transfer protocols; scheduling", } @Article{Choi:2009:JFT, author = "Hyun Jin Choi and Seung-Ho Lim and Kyu Ho Park", title = "{JFTL}: a flash translation layer based on a journal remapping for flash memory", journal = j-TOS, volume = "4", number = "4", pages = "14:1--14:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1480439.1480443", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:33:20 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "In flash memory-based storage, a Flash Translation Layer (FTL) manages the mapping between the logical addresses of a file system and the physical addresses of the flash memory. When a journaling file system is set up on the FTL, the consistency of the file system is guaranteed by duplications of the same file system changes in both the journal region of the file system and the home locations of the changes. However, these duplications inevitably degrade the performance of the file system. In this article we present an efficient FTL, called {\em JFTL}, based on a journal remapping technique. The FTL uses an address mapping method to write all the data to a new region in a process known as an out-of-place update. Because of this process, the existing data in flash memory is not overwritten by such an update. By using this characteristic of the FTL, the JFTL remaps addresses of the logged file system changes to addresses of the home locations of the changes, instead of writing the changes once more to flash memory. Thus, the JFTL efficiently eliminates redundant data in the flash memory as well as preserving the consistency of the journaling file system. Our experiments confirm that, when associated with a writeback or ordered mode of a conventional EXT3 file system, the JFTL enhances the performance of EXT3 by up to 20\%. Furthermore, when the JFTL operates with a journaled mode of EXT3, there is almost a twofold performance gain in many cases. Moreover, the recovery performance of the JFTL is much better than that of the FTL.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "14", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "Flash memory; flash translation layer; garbage detection; journal remapping; journaling file system", } @Article{Li:2009:GCS, author = "Mingqiang Li and Jiwu Shu and Weimin Zheng", title = "{GRID} codes: Strip-based erasure codes with high fault tolerance for storage systems", journal = j-TOS, volume = "4", number = "4", pages = "15:1--15:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1480439.1480444", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:33:20 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "As storage systems grow in size and complexity, they are increasingly confronted with concurrent disk failures together with multiple unrecoverable sector errors. To ensure high data reliability and availability, erasure codes with high fault tolerance are required. In this article, we present a new family of erasure codes with high fault tolerance, named GRID codes. They are called such because they are a family of {\em strip-based codes\/} whose strips are arranged into multi-dimensional grids. In the construction of GRID codes, we first introduce a concept of {\em matched codes\/} and then discuss how to use matched codes to construct GRID codes. In addition, we propose an iterative reconstruction algorithm for GRID codes. We also discuss some important features of GRID codes. Finally, we compare GRID codes with several categories of existing codes. Our comparisons show that for large-scale storage systems, our GRID codes have attractive advantages over many existing erasure codes: (a) They are completely XOR-based and have very regular structures, ensuring easy implementation; (b) they can provide up to 15 and even higher fault tolerance; and (c) their storage efficiency can reach up to 80\% and even higher. All the advantages make GRID codes more suitable for large-scale storage systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "15", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "Disk failure; erasure code; fault tolerance; storage system; unrecoverable sector error", } @Article{Bahn:2009:PPS, author = "Hyokyung Bahn and Soyoon Lee and Sam H. Noh", title = "{P\slash PA-SPTF}: Parallelism-aware request scheduling algorithms for {MEMS}-based storage devices", journal = j-TOS, volume = "5", number = "1", pages = "1:1--1:??", month = mar, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1502777.1502778", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:33:38 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "MEMS-based storage is foreseen as a promising storage media that provides high-bandwidth, low-power consumption, high-density, and low cost. Due to these versatile features, MEMS storage is anticipated to be used for a wide range of applications from storage for small handheld devices to high capacity mass storage servers. However, MEMS storage has vastly different physical characteristics compared to a traditional disk. First, MEMS storage has thousands of heads that can be activated simultaneously. Second, the media of MEMS storage is a square structure which is different from the platter structure of disks. This article presents a new request scheduling algorithm for MEMS storage called P-SPTF that makes use of the aforementioned characteristics. P-SPTF considers the parallelism of MEMS storage as well as the seek time of requests on the two dimensional square structure. We then present another algorithm called PA-SPTF that considers the aging factor so that starvation resistance is improved. Simulation studies show that PA-SPTF improves the performance of MEMS storage by up to 39.2\% in terms of the average response time and 62.4\% in terms of starvation resistance compared to the widely acknowledged SPTF algorithm. We also show that there exists a spectrum of scheduling algorithms that subsumes both the P-SPTF and PA-SPTF algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "1", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "MEMS-based storage; parallelism; scheduling; seek time; starvation", } @Article{Ma:2009:NAS, author = "Di Ma and Gene Tsudik", title = "A new approach to secure logging", journal = j-TOS, volume = "5", number = "1", pages = "2:1--2:??", month = mar, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1502777.1502779", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:33:38 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "The need for secure logging is well-understood by the security professionals, including both researchers and practitioners. The ability to efficiently verify all (or some) log entries is important to any application employing secure logging techniques. In this article, we begin by examining the state of the art in secure logging and identify some problems inherent to systems based on trusted third-party servers. We then propose a different approach to secure logging based upon recently developed Forward-Secure Sequential Aggregate (FssAgg) authentication techniques. Our approach offers both space-efficiency and provable security. We illustrate two concrete schemes --- one private-verifiable and one public-verifiable --- that offer practical secure logging without any reliance on online trusted third parties or secure hardware. We also investigate the concept of immutability in the context of forward-secure sequential aggregate authentication to provide finer grained verification. Finally we evaluate proposed schemes and report on our experience with implementing them within a secure logging system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "2", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "forward secure sequential aggregate (FssAgg) authentication; forward-secure stream integrity; MACs; Secure logging; signatures; truncation attack", } @Article{Garrison:2009:UFS, author = "John A. Garrison and A. L. Narasimha Reddy", title = "{Umbrella File System}: Storage management across heterogeneous devices", journal = j-TOS, volume = "5", number = "1", pages = "3:1--3:??", month = mar, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1502777.1502780", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:33:38 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "With the advent of and recent developments in Flash storage, device characteristic diversity is becoming both more prevalent and more distinct. In this article, we describe the Umbrella File System (UmbrellaFS), a stackable file system designed to provide flexibility in matching diversity of file access characteristics to diversity of device characteristics through a user or system administrator specified policy. We present the design and results from a prototype implementation of UmbrellaFS on both Linux 2.4 and 2.6. The results show that UmbrellaFS has little overhead for most file system operations while providing an ability better to utilize the differences in Flash and traditional hard drives. With appropriate use of rules, we have shown improvements of up to 44\% in certain situations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "3", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "Device characteristics; flash drives; namespaces; policy-driven storage", } @Article{Mi:2009:EMI, author = "Ningfang Mi and Alma Riska and Qi Zhang and Evgenia Smirni and Erik Riedel", title = "Efficient management of idleness in storage systems", journal = j-TOS, volume = "5", number = "2", pages = "4:1--4:??", month = jun, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1534912.1534913", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:33:46 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Various activities that intend to enhance performance, reliability, and availability of storage systems are scheduled with low priority and served during idle times. Under such conditions, idleness becomes a valuable ``resource'' that needs to be efficiently managed. A common approach in system design is to be nonwork conserving by ``idle waiting'', that is, delay the scheduling of background jobs to avoid slowing down upcoming foreground tasks.\par In this article, we complement ``idle waiting'' with the ``estimation'' of background work to be served in every idle interval to effectively manage the trade-off between the performance of foreground and background tasks. As a result, the storage system is better utilized without compromising foreground performance. Our analysis shows that if idle times have low variability, then idle waiting is not necessary. Only if idle times are highly variable does idle waiting become necessary to minimize the impact of background activity on foreground performance. We further show that if there is burstiness in idle intervals, then it is possible to predict accurately the length of incoming idle intervals and use this information to serve more background jobs without affecting foreground performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "4", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "background jobs; continuous data histogram; foreground jobs; idle periods; idleness; low priority work; performance guarantee; resource management; storage systems", } @Article{Storer:2009:PSR, author = "Mark W. Storer and Kevin M. Greenan and Ethan L. Miller and Kaladhar Voruganti", title = "{POTSHARDS} --- a secure, recoverable, long-term archival storage system", journal = j-TOS, volume = "5", number = "2", pages = "5:1--5:??", month = jun, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1534912.1534914", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:33:46 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Users are storing ever-increasing amounts of information digitally, driven by many factors including government regulations and the public's desire to digitally record their personal histories. Unfortunately, many of the security mechanisms that modern systems rely upon, such as encryption, are poorly suited for storing data for indefinitely long periods of time; it is very difficult to manage keys and update cryptosystems to provide secrecy through encryption over periods of decades. Worse, an adversary who can compromise an archive need only wait for cryptanalysis techniques to catch up to the encryption algorithm used at the time of the compromise in order to obtain ``secure'' data. To address these concerns, we have developed POTSHARDS, an archival storage system that provides long-term security for data with very long lifetimes without using encryption. Secrecy is achieved by using unconditionally secure secret splitting and spreading the resulting shares across separately managed archives. Providing availability and data recovery in such a system can be difficult; thus, we use a new technique, approximate pointers, in conjunction with secure distributed RAID techniques to provide availability and reliability across independent archives. To validate our design, we developed a prototype POTSHARDS implementation. In addition to providing us with an experimental testbed, this prototype helped us to understand the design issues that must be addressed in order to maximize security.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "5", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "approximate pointers; Archival storage; secret splitting", } @Article{Bhadkamkar:2009:SSS, author = "Medha Bhadkamkar and Fernando Farfan and Vagelis Hristidis and Raju Rangaswami", title = "Storing semi-structured data on disk drives", journal = j-TOS, volume = "5", number = "2", pages = "6:1--6:??", month = jun, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1534912.1534915", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:33:46 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Applications that manage semi-structured data are becoming increasingly commonplace. Current approaches for storing semi-structured data use existing storage machinery; they either map the data to relational databases, or use a combination of flat files and indexes. While employing these existing storage mechanisms provides readily available solutions, there is a need to more closely examine their suitability to this class of data. Particularly, retrofitting existing solutions for semi-structured data can result in a mismatch between the tree structure of the data and the access characteristics of the underlying storage device (disk drive). This study explores various possibilities in the design space of native storage solutions for semi-structured data by exploring alternative approaches that match application data access characteristics to those of the underlying disk drive. For evaluating the effectiveness of the proposed native techniques in relation to the existing solution, we experiment with XML data using the XPathMark benchmark. Extensive evaluation reveals the strengths and weaknesses of the proposed native data layout techniques. While the existing solutions work really well for {\em deep-focused\/} queries into a semi-structured document (those that result in retrieving entire subtrees), the proposed native solutions substantially outperform for the {\em non-deep-focused\/} queries, which we demonstrate are at least as important as the deep-focused. We believe that native data layout techniques offer a unique direction for improving the performance of semi-structured data stores for a variety of important workloads. However, given that the proposed native techniques require circumventing current storage stack abstractions, further investigation is warranted before they can be applied to general-purpose storage systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "6", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "Semi-structured data; storage management; XML", } @Article{Thomasian:2009:HRR, author = "Alexander Thomasian and Mario Blaum", title = "Higher reliability redundant disk arrays: Organization, operation, and coding", journal = j-TOS, volume = "5", number = "3", pages = "7:1--7:??", month = nov, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1629075.1629076", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:33:57 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Parity is a popular form of data protection in {\em redundant arrays of inexpensive/independent disks (RAID)}. RAID5 dedicates one out of {\em N\/} disks to parity to mask single disk failures, that is, the contents of a block on a failed disk can be reconstructed by exclusive-ORing the corresponding blocks on surviving disks. RAID5 can mask a single disk failure, and it is vulnerable to data loss if a second disk failure occurs. The RAID5 rebuild process systematically reconstructs the contents of a failed disk on a spare disk, returning the system to its original state, but the rebuild process may be unsuccessful due to unreadable sectors. This has led to {\em two disk failure tolerant arrays (2DFTs)}, such as RAID6 based on Reed--Solomon (RS) codes. EVENODD, RDP (Row-Diagonal-Parity), the X-code, and RM2 (Row-Matrix) are 2DFTs with parity coding. RM2 incurs a higher level of redundancy than two disks, while the X-code is limited to a prime number of disks. RDP is optimal with respect to the number of XOR operations at the encoding, but not for short write operations. For small symbol sizes EVENODD and RDP have the same disk access pattern as RAID6, while RM2 and the X-code incur a high recovery cost with two failed disks. We describe variations to RAID5 and RAID6 organizations, including clustered RAID, different methods to update parities, rebuild processing, disk scrubbing to eliminate sector errors, and the {\em intra-disk redundancy (IDR)\/} method to deal with sector errors. We summarize the results of recent studies of failures in hard disk drives. We describe Markov chain reliability models to estimate RAID {\em mean time to data loss (MTTDL)\/} taking into account sector errors and the effect of disk scrubbing. Numerical results show that RAID5 plus IDR attains the same MTTDL level as RAID6, while incurring a lower performance penalty. We conclude with a survey of analytic and simulation studies of RAID performance and tools and benchmarks for RAID performance evaluation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "7", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "Disk array; disk failure studies; performance evaluation; RAID; reliability evaluation", } @Article{Tosun:2009:DCS, author = "Ali {\c{S}}aman Tosun", title = "Divide-and-conquer scheme for strictly optimal retrieval of range queries", journal = j-TOS, volume = "5", number = "3", pages = "8:1--8:??", month = nov, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1629075.1629077", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:33:57 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Declustering distributes data among parallel disks to reduce retrieval cost using I/O parallelism. Many schemes were proposed for single copy declustering of spatial data. Recently, declustering using replication gained a lot of interest and several schemes with different properties were proposed. It is computationally expensive to verify optimality of replication schemes designed for range queries and existing schemes verify optimality for up to 50 disks. In this article, we propose a novel method to find replicated declustering schemes that render all spatial range queries optimal. The proposed scheme uses threshold based declustering, divisibility of large queries for optimization and optimistic approach to compute maximum flow. The proposed scheme is generic and works for any number of dimensions. Experimental results show that using 3 copies there exist allocations that render all spatial range queries optimal for up to 750 disks in 2 dimensions and with the exception of several values for up to 100 disks in 3 dimensions. The proposed scheme improves search for strictly optimal replicated declustering schemes significantly and will be a valuable tool to answer open problems on replicated declustering.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "8", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "Declustering; number theory; parallel I/0; replication; spatial range query; threshold", } @Article{Qin:2009:DLB, author = "Xiao Qin and Hong Jiang and Adam Manzanares and Xiaojun Ruan and Shu Yin", title = "Dynamic load balancing for {I/O}-intensive applications on clusters", journal = j-TOS, volume = "5", number = "3", pages = "9:1--9:??", month = nov, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1629075.1629078", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:33:57 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Load balancing for clusters has been investigated extensively, mainly focusing on the effective usage of global CPU and memory resources. However, previous CPU- or memory-centric load balancing schemes suffer significant performance drop under I/O-intensive workloads due to the imbalance of I/O load. To solve this problem, we propose two simple yet effective I/O-aware load-balancing schemes for two types of clusters: (1) homogeneous clusters where nodes are identical and (2) heterogeneous clusters, which are comprised of a variety of nodes with different performance characteristics in computing power, memory capacity, and disk speed. In addition to assigning I/O-intensive sequential and parallel jobs to nodes with light I/O loads, the proposed schemes judiciously take into account both CPU and memory load sharing in the system. Therefore, our schemes are able to maintain high performance for a wide spectrum of workloads. We develop analytic models to study mean slowdowns, task arrival, and transfer processes in system levels. Using a set of real I/O-intensive parallel applications and synthetic parallel jobs with various I/O characteristics, we show that our proposed schemes consistently improve the performance over existing non-I/O-aware load-balancing schemes, including CPU- and Memory-aware schemes and a PBS-like batch scheduler for parallel and sequential jobs, for a diverse set of workload conditions. Importantly, this performance improvement becomes much more pronounced when the applications are I/O-intensive. For example, the proposed approaches deliver 23.6--88.0 \% performance improvements for I/O-intensive applications such as LU decomposition, Sparse Cholesky, Titan, Parallel text searching, and Data Mining. When I/O load is low or well balanced, the proposed schemes are capable of maintaining the same level of performance as the existing non-I/O-aware schemes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "9", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "clusters; heterogeneity; I/O-intensive applications; Load balancing; storage systems", } @Article{Xie:2009:FAS, author = "Tao Xie and Yao Sun", title = "A file assignment strategy independent of workload characteristic assumptions", journal = j-TOS, volume = "5", number = "3", pages = "10:1--10:??", month = nov, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1629075.1629079", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:33:57 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "The problem of statically assigning nonpartitioned files in a parallel I/O system has been extensively investigated. A basic workload characteristic assumption of most existing solutions to the problem is that there exists a strong inverse correlation between file access frequency and file size. In other words, the most popular files are typically small in size, while the large files are relatively unpopular. Recent studies on the characteristics of Web proxy traces suggested, however, the correlation, if any, is so weak that it can be ignored. Hence, the following two questions arise naturally. First, can existing algorithms still perform well when the workload assumption does not hold? Second, if not, can one develop a new file assignment strategy that is immune to the workload assumption? To answer these questions, we first evaluate the performance of three well-known file assignment algorithms with and without the workload assumption, respectively. Next, we develop a novel static nonpartitioned file assignment strategy for parallel I/O systems, called static round-robin (SOR), which is immune to the workload assumption. Comprehensive experimental results show that SOR consistently improves the performance in terms of mean response time over the existing schemes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "10", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "File assignment; load balancing; parallel I/O; workload characteristics; Zipfian distribution", } @Article{Seltzer:2009:ISI, author = "Margo Seltzer and Ric Wheeler", title = "Introduction to special issue {FAST 2009}", journal = j-TOS, volume = "5", number = "4", pages = "11:1--11:??", month = dec, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1629080.1629081", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:34:12 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "11", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Hasan:2009:PHF, author = "Ragib Hasan and Radu Sion and Marianne Winslett", title = "Preventing history forgery with secure provenance", journal = j-TOS, volume = "5", number = "4", pages = "12:1--12:??", month = dec, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1629080.1629082", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:34:12 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "As increasing amounts of valuable information are produced and persist digitally, the ability to determine the origin of data becomes important. In science, medicine, commerce, and government, data provenance tracking is essential for rights protection, regulatory compliance, management of intelligence and medical data, and authentication of information as it flows through workplace tasks. While significant research has been conducted in this area, the associated security and privacy issues have not been explored, leaving provenance information vulnerable to illicit alteration as it passes through untrusted environments.\par In this article, we show how to provide strong integrity and confidentiality assurances for data provenance information at the kernel, file system, or application layer. We describe Sprov, our provenance-aware system prototype that implements provenance tracking of data writes at the application layer, which makes Sprov extremely easy to deploy. We present empirical results that show that, for real-life workloads, the runtime overhead of Sprov for recording provenance with confidentiality and integrity guarantees ranges from 1\% to 13\%, when all file modifications are recorded, and from 12\% to 16\%, when all file read and modifications are tracked.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "12", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "audit; confidentiality; integrity; lineage; provenance; security", } @Article{Muniswamy-Reddy:2009:CBV, author = "Kiran-Kumar Muniswamy-Reddy and David A. Holland", title = "Causality-based versioning", journal = j-TOS, volume = "5", number = "4", pages = "13:1--13:??", month = dec, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1629080.1629083", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:34:12 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Versioning file systems provide the ability to recover from a variety of failures, including file corruption, virus and worm infestations, and user mistakes. However, using versions to recover from data-corrupting events requires a human to determine precisely which files and versions to restore. We can create more meaningful versions and enhance the value of those versions by capturing the causal connections among files, facilitating selection and recovery of precisely the right versions after data corrupting events.\par We determine when to create new versions of files automatically using the causal relationships among files. The literature on versioning file systems usually examines two extremes of possible version-creation algorithms: open-to-close versioning and versioning on every write. We evaluate causal versions of these two algorithms and introduce two additional causality-based algorithms: Cycle-Avoidance and Graph-Finesse.\par We show that capturing and maintaining causal relationships imposes less than 7\% overhead on a versioning system, providing benefit at low cost. We then show that Cycle-Avoidance provides more meaningful versions of files created during concurrent program execution, with overhead comparable to open/close versioning. Graph-Finesse provides even greater control, frequently at comparable overhead, but sometimes at unacceptable overhead. Versioning on every write is an interesting extreme case, but is far too costly to be useful in practice.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "13", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "causality; data provenance", } @Article{Vrable:2009:CFB, author = "Michael Vrable and Stefan Savage and Geoffrey M. Voelker", title = "{Cumulus}: Filesystem backup to the cloud", journal = j-TOS, volume = "5", number = "4", pages = "14:1--14:??", month = dec, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1629080.1629084", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:34:12 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Cumulus is a system for efficiently implementing filesystem backups over the Internet, specifically designed under a {\em thin cloud\/} assumption --- that the remote datacenter storing the backups does not provide any special backup services, but only a least-common-denominator storage interface. Cumulus aggregates data from small files for storage and uses LFS-inspired segment cleaning to maintain storage efficiency. While Cumulus can use virtually any storage service, we show its efficiency is comparable to integrated approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "14", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "Backup; cloud storage", } @Article{Batsakis:2009:CNC, author = "Alexandros Batsakis and Randal Burns and Arkady Kanevsky and James Lentini and Thomas Talpey", title = "{CA-NFS}: a congestion-aware network file system", journal = j-TOS, volume = "5", number = "4", pages = "15:1--15:??", month = dec, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1629080.1629085", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:34:12 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "We develop a holistic framework for adaptively scheduling asynchronous requests in distributed file systems. The system is holistic in that it manages all resources, including network bandwidth, server I/O, server CPU, and client and server memory utilization. It accelerates, defers, or cancels asynchronous requests in order to improve application-perceived performance directly. We employ congestion pricing via online auctions to coordinate the use of system resources by the file system clients so that they can detect shortages and adapt their resource usage. We implement our modifications in the Congestion-Aware Network File System (CA-NFS), an extension to the ubiquitous network file system (NFS). Our experimental result shows that CA-NFS results in a 20\% improvement in execution times when compared with NFS for a variety of workloads.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "15", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "congestion; File systems; NFS; performance; scalability", } @Article{Agrawal:2009:GRI, author = "Nitin Agrawal and Andrea C. Arpaci-Dusseau and Remzi H. Arpaci-Dusseau", title = "Generating realistic {{\em Impressions\/}} for file-system benchmarking", journal = j-TOS, volume = "5", number = "4", pages = "16:1--16:??", month = dec, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1629080.1629086", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 16 15:34:12 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "The performance of file systems and related software depends on characteristics of the underlying file-system image (i.e., file-system metadata and file contents). Unfortunately, rather than benchmarking with realistic file-system images, most system designers and evaluators rely on {\em ad hoc\/} assumptions and (often inaccurate) rules of thumb. Furthermore, the lack of standardization and reproducibility makes file-system benchmarking ineffective. To remedy these problems, we develop Impressions, a framework to generate statistically accurate file-system images with realistic metadata and content. Impressions is flexible, supporting user-specified constraints on various file-system parameters using a number of statistical techniques to generate consistent images. In this article, we present the design, implementation, and evaluation of Impressions and demonstrate its utility using desktop search as a case study. We believe Impressions will prove to be useful to system developers and users alike.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "16", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "File and storage system benchmarking", } @Article{Khatib:2010:OMB, author = "Mohammed G. Khatib and Pieter H. Hartel", title = "Optimizing {MEMS}-based storage devices for mobile battery-powered systems", journal = j-TOS, volume = "6", number = "1", pages = "1:1--1:??", month = mar, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1714454.1714455", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Aug 14 17:04:28 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "An emerging storage technology, called MEMS-based storage, promises nonvolatile storage devices with ultrahigh density, high rigidity, a small form factor, and low cost. For these reasons, MEMS-based storage devices are suitable for battery-powered mobile systems such as PDAs. For deployment in such systems, MEMS-based storage devices must consume little energy. This work mainly targets reducing the energy consumption of this class of devices.\par We derive the operation modes of a MEMS-based storage device and systemically devise a policy in each mode for energy saving. Three types of policies are presented: power management, shutdown, and data-layout policy. Combined, these policies reduce the total energy consumed by a MEMS-based storage device. A MEMS-based storage device that enforces these policies comes close to Flash with respect to energy consumption and response time. However, enhancement on the device level is still needed; we present some suggestions to resolve this issue.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "1", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "design space; energy efficiency; green storage; mobile systems; Probe storage", } @Article{Yu:2010:NVS, author = "Young Jin Yu and Dong In Shin and Hyeonsang Eom and Heon Young Yeom", title = "{NCQ} vs. {I/O} scheduler: Preventing unexpected misbehaviors", journal = j-TOS, volume = "6", number = "1", pages = "2:1--2:??", month = mar, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1714454.1714456", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Aug 14 17:04:28 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Native Command Queueing (NCQ) is an optimization technology to maximize throughput by reordering requests inside a disk drive. It has been so successful that NCQ has become the standard in SATA 2 protocol specification, and the great majority of disk vendors have adopted it for their recent disks. However, there is a possibility that the technology may lead to an information gap between the OS and a disk drive. A NCQ-enabled disk tries to optimize throughput without realizing the intention of an OS, whereas the OS does its best under the assumption that the disk will do as it is told without specific knowledge regarding the details of the disk mechanism. Let us call this {\em expectation discord}, which may cause serious problems such as request starvations or performance anomaly. In this article, we (1) confirm that {\em expectation discord\/} actually occurs in real systems; (2) propose software-level approaches to solve them; and (3) evaluate our mechanism. Experimental results show that our solution is simple, cheap (no special hardware required), portable, and effective.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "2", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "hybrid scheduling; I/O prioritization; NCQ; SATA 2; starvation detection", } @Article{Jung:2010:FES, author = "Jaemin Jung and Youjip Won and Eunki Kim and Hyungjong Shin and Byeonggil Jeon", title = "{FRASH}: Exploiting storage class memory in hybrid file system for hierarchical storage", journal = j-TOS, volume = "6", number = "1", pages = "3:1--3:??", month = mar, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1714454.1714457", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Aug 14 17:04:28 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "In this work, we develop a novel hybrid file system, FRASH, for storage-class memory and NAND Flash. Despite the promising physical characteristics of storage-class memory, its scale is an order of magnitude smaller than the current storage device scale. This fact makes it less than desirable for use as an independent storage device. We carefully analyze in-memory and on-disk file system objects in a log-structured file system, and exploit memory and storage aspects of the storage-class memory to overcome the drawbacks of the current log-structured file system. FRASH provides a hybrid view storage-class memory. It harbors an in-memory data structure as well as a on-disk structure. It provides nonvolatility to key data structures which have been maintained in-memory in a legacy log-structured file system. This approach greatly improves the mount latency and effectively resolves the robustness issue. By maintaining on-disk structure in storage-class memory, FRASH provides byte-addressability to the file system object and metadata for page, and subsequently greatly improves the I/O performance compared to the legacy log-structured approach. While storage-class memory offers byte granularity, it is still far slower than its DRAM counter part. We develop a copy-on-mount technique to overcome the access latency difference between main memory and storage-class memory. Our file system was able to reduce the mount time by 92\% and file system I/O performance was increased by 16\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "3", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "Flash storage; log-structured file system", } @Article{Balakrishnan:2010:DRR, author = "Mahesh Balakrishnan and Asim Kadav and Vijayan Prabhakaran and Dahlia Malkhi", title = "Differential {RAID}: Rethinking {RAID} for {SSD} reliability", journal = j-TOS, volume = "6", number = "2", pages = "4:1--4:??", month = jul, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1807060.1807061", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Aug 14 17:04:39 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "SSDs exhibit very different failure characteristics compared to hard drives. In particular, the bit error rate (BER) of an SSD climbs as it receives more writes. As a result, RAID arrays composed from SSDs are subject to correlated failures. By balancing writes evenly across the array, RAID schemes can wear out devices at similar times. When a device in the array fails towards the end of its lifetime, the high BER of the remaining devices can result in data loss. We propose Diff-RAID, a parity-based redundancy solution that creates an age differential in an array of SSDs. Diff-RAID distributes parity blocks unevenly across the array, leveraging their higher update rate to age devices at different rates. To maintain this age differential when old devices are replaced by new ones, Diff-RAID reshuffles the parity distribution on each drive replacement. We evaluate Diff-RAID's reliability by using real BER data from 12 flash chips on a simulator and show that it is more reliable than RAID-5, in some cases by multiple orders of magnitude. We also evaluate Diff-RAID's performance using a software implementation on a 5-device array of 80 GB Intel X25-M SSDs and show that it offers a trade-off between throughput and reliability.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "4", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "flash; RAID; SSD", } @Article{Chang:2010:SEN, author = "Yuan-Hao Chang and Jian-Hong Lin and Jen-Wei Hsieh and Tei-Wei Kuo", title = "A strategy to emulate {NOR} flash with {NAND} flash", journal = j-TOS, volume = "6", number = "2", pages = "5:1--5:??", month = jul, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1807060.1807062", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Aug 14 17:04:39 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "This work is motivated by a strong market demand for the replacement of NOR flash memory with NAND flash memory to cut down the cost of many embedded-system designs, such as mobile phones. Different from LRU-related caching or buffering studies, we are interested in prediction-based prefetching based on given execution traces of application executions. An implementation strategy is proposed for the storage of the prefetching information with limited SRAM and run-time overheads. An efficient prediction procedure is presented based on information extracted from application executions to reduce the performance gap between NAND flash memory and NOR flash memory in reads. With the behavior of a target application extracted from a set of collected traces, we show that data access to NOR flash memory can respond effectively over the proposed implementation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "5", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "data prefetching; flash memory; NAND; NOR", } @Article{Gim:2010:EIQ, author = "Jongmin Gim and Youjip Won", title = "Extract and infer quickly: Obtaining sector geometry of modern hard disk drives", journal = j-TOS, volume = "6", number = "2", pages = "6:1--6:??", month = jul, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1807060.1807063", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Aug 14 17:04:39 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "The modern hard disk drive is a complex and complicated device. It consists of 2--4 heads, thousands of sectors per track, several hundred thousands of tracks, and tens of zones. The beginnings of adjacent tracks are placed with a certain angular offset. Sectors are placed on the tracks and accessed in some order. Angular offset and sector placement order vary widely subject to vendors and models. The success of an efficient file and storage subsystem design relies on the proper understanding of the underlying storage device characteristics. The characterization of hard disk drives has been a subject of intense research for more than a decade. The scale and complexity of state-of-the-art hard disk drive technology calls for a new way of extracting and analyzing the characteristics of the hard disk drive. In this work, we develop a novel disk characterization suite, DIG (Disk Geometry Analyzer), which allows us to rapidly extract and characterize the key performance metrics of the modern hard disk drive. Development of this tool is accompanied by thorough examination of four off-the-shelf hard disk drives. DIG consists of three key ingredients: $ O(1) $ a track boundary detection algorithm; $ O(\log n) $ a zone boundary detection algorithm; and hybrid sampling based seek time profiling. We particularly focus on addressing the scalability aspect of disk characterization. With DIG, we are able to extract key metrics of hard disk drives, for example, track sizes, zone information, sector geometry and so on, within 3--20 minutes. DIG allows us to determine the sector layout mechanism of the underlying hard disk drive, for example, hybrid serpentine, cylinder serpentine, and surface serpentine, and to a build complete sector map from LBN to the three dimensional space of (Cylinder, Head, Sector). Examining the hard disk drives with DIG, we made a number of important observations. In modern hard disk drives, head switch overhead is far greater than track switch overhead. It seems that hard disk drive vendors put greater emphasis on reducing the number of head switches for data access. Most disk vendors use surface serpentine, cylinder serpentine, or hybrid serpentine schemes in laying sectors on the platters. The legacy seek time model, which takes the form of $ a + b \sqrt d $ leaves much to be desired for use in modern hard disk drives especially for short seeks (less than 5000 tracks). We compare the performance of the DIG against the existing state-of-the-art disk profiling algorithm. Compared to the existing state-of-the-art disk characterization algorithm, the DIG algorithm significantly decreases the time to extract comprehensive sector geometry information from 1920 minutes to 7 minutes and 1927 minutes to 180 minutes in best and worst case scenarios, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "6", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "Hard disk; performance characterization; sector geometry; seek time; track skew; zone", } @Article{Wang:2010:SSO, author = "Yang Wang and Jiwu Shu and Guangyan Zhang and Wei Xue and Weimin Zheng", title = "{SOPA}: Selecting the optimal caching policy adaptively", journal = j-TOS, volume = "6", number = "2", pages = "7:1--7:??", month = jul, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1807060.1807064", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Aug 14 17:04:39 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "With the development of storage technology and applications, new caching policies are continuously being introduced. It becomes increasingly important for storage systems to be able to select the matched caching policy dynamically under varying workloads. This article proposes SOPA, a cache framework to adaptively select the matched policy and perform policy switches in storage systems. SOPA encapsulates the functions of a caching policy into a module, and enables online policy switching by policy reconstruction. SOPA then selects the policy matched with the workload dynamically by collecting and analyzing access traces. To reduce the decision-making cost, SOPA proposes an asynchronous decision making process. The simulation experiments show that no single caching policy performed well under all of the different workloads. With SOPA, a storage system could select the appropriate policy for different workloads. The real-system evaluation results show that SOPA reduced the average response time by up to 20.3\% and 11.9\% compared with LRU and ARC, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "7", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", keywords = "Caching policies; policy adaptation; policy switch", } @Article{Burns:2010:GEF, author = "Randal Burns and Kimberly Keeton", title = "Guest editorial: {FAST'10}", journal = j-TOS, volume = "6", number = "3", pages = "8:1--8:??", month = sep, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1837915.1837916", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Nov 23 10:40:15 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "8", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Schroeder:2010:ULS, author = "Bianca Schroeder and Sotirios Damouras and Phillipa Gill", title = "Understanding latent sector errors and how to protect against them", journal = j-TOS, volume = "6", number = "3", pages = "9:1--9:??", month = sep, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1837915.1837917", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Nov 23 10:40:15 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "9", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Sehgal:2010:OEP, author = "Priya Sehgal and Vasily Tarasov and Erez Zadok", title = "Optimizing energy and performance for server-class file system workloads", journal = j-TOS, volume = "6", number = "3", pages = "10:1--10:??", month = sep, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1837915.1837918", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Nov 23 10:40:15 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "10", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Sundararaman:2010:MOS, author = "Swaminathan Sundararaman and Sriram Subramanian and Abhishek Rajimwale and Andrea C. Arpaci-Dusseau and Remzi H. Arpaci-Dusseau and Michael M. Swift", title = "{Membrane}: {Operating} system support for restartable file systems", journal = j-TOS, volume = "6", number = "3", pages = "11:1--11:??", month = sep, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1837915.1837919", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Nov 23 10:40:15 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "11", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Veeraraghavan:2010:QRF, author = "Kaushik Veeraraghavan and Jason Flinn and Edmund B. Nightingale and Brian Noble", title = "{quFiles}: {The} right file at the right time", journal = j-TOS, volume = "6", number = "3", pages = "12:1--12:??", month = sep, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1837915.1837920", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Nov 23 10:40:15 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "12", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Koller:2010:DUC, author = "Ricardo Koller and Raju Rangaswami", title = "{I/O Deduplication}: {Utilizing} content similarity to improve {I/O} performance", journal = j-TOS, volume = "6", number = "3", pages = "13:1--13:??", month = sep, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1837915.1837921", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Nov 23 10:40:15 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "13", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Josephson:2010:DFS, author = "William K. Josephson and Lars A. Bongo and Kai Li and David Flynn", title = "{DFS}: a file system for virtualized flash storage", journal = j-TOS, volume = "6", number = "3", pages = "14:1--14:??", month = sep, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1837915.1837922", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Nov 23 10:40:15 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "14", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Shim:2011:HFT, author = "Gyudong Shim and Youngwoo Park and Kyu Ho Park", title = "A hybrid flash translation layer with adaptive merge for {SSDs}", journal = j-TOS, volume = "6", number = "4", pages = "15:1--15:??", month = may, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1970338.1970339", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Jun 7 18:40:46 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "15", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Plank:2011:MDR, author = "James S. Plank and Adam L. Buchsbaum and Bradley T. {Vander Zanden}", title = "Minimum density {RAID-6} codes", journal = j-TOS, volume = "6", number = "4", pages = "16:1--16:??", month = may, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1970338.1970340", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Jun 7 18:40:46 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "16", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Tian:2011:OAU, author = "Lei Tian and Qiang Cao and Hong Jiang and Dan Feng and Changsheng Xie and Qin Xin", title = "Online availability upgrades for parity-based {RAIDs} through supplementary parity augmentations", journal = j-TOS, volume = "6", number = "4", pages = "17:1--17:??", month = may, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1970338.1970341", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Jun 7 18:40:46 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "17", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Chang:2011:DLC, author = "Yuan-Hao Chang and Ping-Yi Hsu and Yung-Feng Lu and Tei-Wei Kuo", title = "A driver-layer caching policy for removable storage devices", journal = j-TOS, volume = "7", number = "1", pages = "1:1--1:??", month = jun, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1970343.1970344", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Oct 22 09:33:53 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "1", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Tomazic:2011:FFE, author = "Saso Tomazic and Vesna Pavlovic and Jasna Milovanovic and Jaka Sodnik and Anton Kos and Sara Stancin and Veljko Milutinovic", title = "Fast file existence checking in archiving systems", journal = j-TOS, volume = "7", number = "1", pages = "2:1--2:??", month = jun, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1970343.1970345", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Oct 22 09:33:53 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "2", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Manzanares:2011:PBP, author = "Adam Manzanares and Xiao Qin and Xiaojun Ruan and Shu Yin", title = "{PRE-BUD}: {Prefetching} for energy-efficient parallel {I/O} systems with buffer disks", journal = j-TOS, volume = "7", number = "1", pages = "3:1--3:??", month = jun, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1970343.1970346", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Oct 22 09:33:53 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "3", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Shin:2011:RBI, author = "Dong In Shin and Young Jin Yu and Hyeong S. Kim and Hyeonsang Eom and Heon Young Yeom", title = "Request Bridging and Interleaving: Improving the Performance of Small Synchronous Updates under Seek-Optimizing Disk Subsystems", journal = j-TOS, volume = "7", number = "2", pages = "4:1--4:??", month = jul, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1970348.1970349", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Oct 22 09:33:54 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "4", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Iliadis:2011:DSV, author = "Ilias Iliadis and Robert Haas and Xiao-Yu Hu and Evangelos Eleftheriou", title = "Disk Scrubbing Versus Intradisk Redundancy for {RAID} Storage Systems", journal = j-TOS, volume = "7", number = "2", pages = "5:1--5:??", month = jul, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1970348.1970350", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Oct 22 09:33:54 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "5", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{You:2011:PFE, author = "Lawrence L. You and Kristal T. Pollack and Darrell D. E. Long and K. Gopinath", title = "{PRESIDIO}: a Framework for Efficient Archival Data Storage", journal = j-TOS, volume = "7", number = "2", pages = "6:1--6:??", month = jul, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1970348.1970351", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Oct 22 09:33:54 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "6", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Brinkmann:2011:GE, author = "Andr{\'e} Brinkmann and David Pease", title = "Guest Editorial", journal = j-TOS, volume = "7", number = "3", pages = "7:1--7:??", month = oct, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2027066.2027067", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sun Nov 6 06:42:42 MST 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "7", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Carns:2011:UIC, author = "Philip Carns and Kevin Harms and William Allcock and Charles Bacon and Samuel Lang and Robert Latham and Robert Ross", title = "Understanding and Improving Computational Science Storage Access through Continuous Characterization", journal = j-TOS, volume = "7", number = "3", pages = "8:1--8:??", month = oct, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2027066.2027068", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sun Nov 6 06:42:42 MST 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "8", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Zhang:2011:YCY, author = "Xuechen Zhang and Yuehai Xu and Song Jiang", title = "{YouChoose}: Choosing your Storage Device as a Performance Interface to Consolidated {I/O} Service", journal = j-TOS, volume = "7", number = "3", pages = "9:1--9:??", month = oct, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2027066.2027069", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sun Nov 6 06:42:42 MST 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "9", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Huang:2011:RRT, author = "Zhen Huang and Ernst Biersack and Yuxing Peng", title = "Reducing Repair Traffic in {P2P} Backup Systems: Exact Regenerating Codes on Hierarchical Codes", journal = j-TOS, volume = "7", number = "3", pages = "10:1--10:??", month = oct, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2027066.2027070", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sun Nov 6 06:42:42 MST 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "10", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Xiang:2011:HAF, author = "Liping Xiang and Yinlong Xu and John C. S. Lui and Qian Chang and Yubiao Pan and Runhui Li", title = "A Hybrid Approach to Failed Disk Recovery Using {RAID-6} Codes: Algorithms and Performance Evaluation", journal = j-TOS, volume = "7", number = "3", pages = "11:1--11:??", month = oct, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2027066.2027071", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sun Nov 6 06:42:42 MST 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "11", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Agrawal:2012:EGS, author = "Nitin Agrawal and Leo Arulraj and Andrea C. Arpaci-Dusseau and Remzi H. Arpaci-Dusseau", title = "Emulating goliath storage systems with {David}", journal = j-TOS, volume = "7", number = "4", pages = "12:1--12:??", month = jan, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2078861.2078862", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Mar 16 15:48:58 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Benchmarking file and storage systems on large file-system images is important, but difficult and often infeasible. Typically, running benchmarks on such large disk setups is a frequent source of frustration for file-system evaluators; the scale alone acts as a strong deterrent against using larger, albeit realistic, benchmarks. To address this problem, we develop David: a system that makes it practical to run large benchmarks using modest amount of storage or memory capacities readily available on most computers. David creates a `compressed' version of the original file-system image by omitting all file data and laying out metadata more efficiently; an online storage model determines the runtime of the benchmark workload on the original uncompressed image. David works under any file system, as demonstrated in this article with ext3 and btrfs. We find that David reduces storage requirements by orders of magnitude; David is able to emulate a 1-TB target workload using only an 80 GB available disk, while still modeling the actual runtime accurately. David can also emulate newer or faster devices, for example, we show how David can effectively emulate a multidisk RAID using a limited amount of memory.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "12", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Sundararaman:2012:MCC, author = "Swaminathan Sundararaman and Yupu Zhang and Sriram Subramanian and Andrea C. Arpaci-Dusseau and Remzi H. Arpaci-Dusseau", title = "Making the common case the only case with anticipatory memory allocation", journal = j-TOS, volume = "7", number = "4", pages = "13:1--13:??", month = jan, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2078861.2078863", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Mar 16 15:48:58 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "We present anticipatory memory allocation (AMA), a new method to build kernel code that is robust to memory-allocation failures. AMA avoids the usual difficulties in handling allocation failures through a novel combination of static and dynamic techniques. Specifically, a developer, with assistance from AMA static analysis tools, determines how much memory a particular call into a kernel subsystem will need, and then preallocates said amount immediately upon entry to the kernel; subsequent allocation requests are serviced from the preallocated pool and thus guaranteed never to fail. We describe the static and runtime components of AMA, and then present a thorough evaluation of Linux ext2-mfr, a case study in which we transform the Linux ext2 file system into a memory-failure robust version of itself. Experiments reveal that ext2-mfr avoids memory-allocation failures successfully while incurring little space or time overhead.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "13", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Meyer:2012:SPD, author = "Dutch T. Meyer and William J. Bolosky", title = "A study of practical deduplication", journal = j-TOS, volume = "7", number = "4", pages = "14:1--14:??", month = jan, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2078861.2078864", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Mar 16 15:48:58 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "We collected file system content data from 857 desktop computers at Microsoft over a span of 4 weeks. We analyzed the data to determine the relative efficacy of data deduplication, particularly considering whole-file versus block-level elimination of redundancy. We found that whole-file deduplication achieves about three quarters of the space savings of the most aggressive block-level deduplication for storage of live file systems, and 87\% of the savings for backup images. We also studied file fragmentation, finding that it is not prevalent, and updated prior file system metadata studies, finding that the distribution of file sizes continues to skew toward very large unstructured files.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "14", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Wu:2012:AWB, author = "Guanying Wu and Xubin He and Ben Eckart", title = "An adaptive write buffer management scheme for flash-based {SSDs}", journal = j-TOS, volume = "8", number = "1", pages = "1:1--1:??", month = feb, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2093139.2093140", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Mar 16 15:48:59 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Solid State Drives (SSD's) have shown promise to be a candidate to replace traditional hard disk drives. The benefits of SSD's over HDD's include better durability, higher performance, and lower power consumption, but due to certain physical characteristics of NAND flash, which comprise SSD's, there are some challenging areas of improvement and further research. We focus on the layout and management of the small amount of RAM that serves as a cache between the SSD and the system that uses it. Of the techniques that have previously been proposed to manage this cache, we identify several sources of inefficient cache space management due to the way pages are clustered in blocks and the limited replacement policy. We find that in many traces hot pages reside in otherwise cold blocks, and that the spatial locality of most clusters can be fully exploited in a limited time period, so we develop a hybrid page/block architecture along with an advanced replacement policy, called BPAC, or Block-Page Adaptive Cache, to exploit both temporal and spatial locality. Our technique involves adaptively partitioning the SSD on-disk cache to separately hold pages with high temporal locality in a page list and clusters of pages with low temporal but high spatial locality in a block list. In addition, we have developed a novel mechanism for flash-based SSD's to characterize the spatial locality of the disk I/O workload and an approach to dynamically identify the set of low spatial locality clusters. We run trace-driven simulations to verify our design and find that it outperforms other popular flash-aware cache schemes under different workloads. For instance, compared to a popular flash aware cache algorithm BPLRU, BPAC reduces the number of cache evictions by up to 79.6\% and 34\% on average.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "1", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Luo:2012:ESI, author = "Jianqiang Luo and Kevin D. Bowers and Alina Oprea and Lihao Xu", title = "Efficient software implementations of large finite fields {$ {\rm GF}(2^n) $} for secure storage applications", journal = j-TOS, volume = "8", number = "1", pages = "2:1--2:??", month = feb, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2093139.2093141", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Mar 16 15:48:59 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Finite fields are widely used in constructing error-correcting codes and cryptographic algorithms. In practice, error-correcting codes use small finite fields to achieve high-throughput encoding and decoding. Conversely, cryptographic systems employ considerably larger finite fields to achieve high levels of security. We focus on developing efficient software implementations of arithmetic operations in reasonably large finite fields as needed by secure storage applications. In this article, we study several arithmetic operation implementations for finite fields ranging from $ {\rm GF}(2^{32}) $ to $ {\rm GF}(2^{128}) $. We implement multiplication and division in these finite fields by making use of precomputed tables in smaller fields, and several techniques of extending smaller field arithmetic into larger field operations. We show that by exploiting known techniques, as well as new optimizations, we are able to efficiently support operations over finite fields of interest. We perform a detailed evaluation of several techniques, and show that we achieve very practical performance for both multiplication and division. Finally, we show how these techniques find applications in the implementation of HAIL, a highly available distributed cloud storage layer. Using the newly implemented arithmetic operations in $ {\rm GF}(2^{64}) $, HAIL improves its performance by a factor of two, while simultaneously providing a higher level of security.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "2", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Chang:2012:COM, author = "Yuan-Hao Chang and Cheng-Kang Hsieh and Po-Chun Huang and Pi-Cheng Hsiu", title = "A caching-oriented management design for the performance enhancement of solid-state drives", journal = j-TOS, volume = "8", number = "1", pages = "3:1--3:??", month = feb, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2093139.2093142", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Mar 16 15:48:59 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "While solid-state drives are excellent alternatives to hard disks in mobile devices, a number of performance and reliability issues need to be addressed. In this work, we design an efficient flash management scheme for the performance improvement of low-cost MLC flash memory devices. Specifically, we design an efficient flash management scheme for multi-chipped flash memory devices with cache support, and develop a two-level address translation mechanism with an adaptive caching policy. We evaluated the approach on real workloads. The results demonstrate that it can improve the performance of multi-chipped solid-state drives through logical-to-physical mappings and concurrent accesses to flash chips.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "3", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Mao:2012:HHP, author = "Bo Mao and Hong Jiang and Suzhen Wu and Lei Tian and Dan Feng and Jianxi Chen and Lingfang Zeng", title = "{HPDA}: a hybrid parity-based disk array for enhanced performance and reliability", journal = j-TOS, volume = "8", number = "1", pages = "4:1--4:??", month = feb, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2093139.2093143", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Mar 16 15:48:59 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Flash-based Solid State Drive (SSD) has been productively shipped and deployed in large scale storage systems. However, a single flash-based SSD cannot satisfy the capacity, performance and reliability requirements of the modern storage systems that support increasingly demanding data-intensive computing applications. Applying RAID schemes to SSDs to meet these requirements, while a logical and viable solution, faces many challenges. In this article, we propose a Hybrid Parity-based Disk Array architecture (short for HPDA), which combines a group of SSDs and two hard disk drives (HDDs) to improve the performance and reliability of SSD-based storage systems. In HPDA, the SSDs (data disks) and part of one HDD (parity disk) compose a RAID4 disk array. Meanwhile, a second HDD and the free space of the parity disk are mirrored to form a RAID1-style write buffer that temporarily absorbs the small write requests and acts as a surrogate set during recovery when a disk fails. The write data is reclaimed to the data disks during the lightly loaded or idle periods of the system. Reliability analysis shows that the reliability of HPDA, in terms of MTTDL (Mean Time To Data Loss), is better than that of either pure HDD-based or SSD-based disk array. Our prototype implementation of HPDA and the performance evaluations show that HPDA significantly outperforms either HDD-based or SSD-based disk array.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "4", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Klonatos:2012:TOS, author = "Yannis Klonatos and Thanos Makatos and Manolis Marazakis and Michail D. Flouris and Angelos Bilas", title = "Transparent Online Storage Compression at the Block-Level", journal = j-TOS, volume = "8", number = "2", pages = "5:1--5:??", month = may, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180905.2180906", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Nov 6 18:17:34 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "In this work, we examine how transparent block-level compression in the I/O path can improve both the space efficiency and performance of online storage. We present ZBD, a block-layer driver that transparently compresses and decompresses data as they flow between the file-system and storage devices. Our system provides support for variable-size blocks, metadata caching, and persistence, as well as block allocation and cleanup. ZBD targets maintaining high performance, by mitigating compression and decompression overheads that can have a significant impact on performance by leveraging modern multicore CPUs through explicit work scheduling. We present two case-studies for compression. First, we examine how our approach can be used to increase the capacity of SSD-based caches, thus increasing their cost-effectiveness. Then, we examine how ZBD can improve the efficiency of online disk-based storage systems. We evaluate our approach in the Linux kernel on a commodity server with multicore CPUs, using PostMark, SPECsfs2008, TPC-C, and TPC-H. Preliminary results show that transparent online block-level compression is a viable option for improving effective storage capacity, it can improve I/O performance up to 80\% by reducing I/O traffic and seek distance, and has a negative impact on performance, up to 34\%, only when single-thread I/O latency is critical. In particular, for SSD-based caching, our results indicate that, in line with current technology trends, compressed caching trades off CPU utilization for performance and enhances SSD efficiency as a storage cache up to 99\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "5", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Adams:2012:AWB, author = "Ian F. Adams and Mark W. Storer and Ethan L. Miller", title = "Analysis of Workload Behavior in Scientific and Historical Long-Term Data Repositories", journal = j-TOS, volume = "8", number = "2", pages = "6:1--6:??", month = may, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180905.2180907", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Nov 6 18:17:34 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "The scope of archival systems is expanding beyond cheap tertiary storage: scientific and medical data is increasingly digital, and the public has a growing desire to digitally record their personal histories. Driven by the increase in cost efficiency of hard drives, and the rise of the Internet, content archives have become a means of providing the public with fast, cheap access to long-term data. Unfortunately, designers of purpose-built archival systems are either forced to rely on workload behavior obtained from a narrow, anachronistic view of archives as simply cheap tertiary storage, or extrapolate from marginally related enterprise workload data and traditional library access patterns. To close this knowledge gap and provide relevant input for the design of effective long-term data storage systems, we studied the workload behavior of several systems within this expanded archival storage space. Our study examined several scientific and historical archives, covering a mixture of purposes, media types, and access models---that is, public versus private. Our findings show that, for more traditional private scientific archival storage, files have become larger, but update rates have remained largely unchanged. However, in the public content archives we observed, we saw behavior that diverges from the traditional ``write-once, read-maybe'' behavior of tertiary storage. Our study shows that the majority of such data is modified---sometimes unnecessarily---relatively frequently, and that indexing services such as Google and internal data management processes may routinely access large portions of an archive, accounting for most of the accesses. Based on these observations, we identify areas for improving the efficiency and performance of archival storage systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "6", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Hsieh:2012:MDI, author = "Jen-Wei Hsieh and Chung-Hsien Wu and Ge-Ming Chiu", title = "{MFTL}: a Design and Implementation for {MLC} Flash Memory Storage Systems", journal = j-TOS, volume = "8", number = "2", pages = "7:1--7:??", month = may, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180905.2180908", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Nov 6 18:17:34 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "NAND flash memory has gained its popularity in a variety of applications as a storage medium due to its low power consumption, nonvolatility, high performance, physical stability, and portability. In particular, Multi-Level Cell (MLC) flash memory, which provides a lower cost and higher density solution, has occupied the largest part of NAND flash-memory market share. However, MLC flash memory also introduces new challenges: (1) Pages in a block must be written sequentially. (2) Information to indicate a page being obsoleted cannot be recorded in its spare area due to the limitation on the number of partial programming. Since most of applications access NAND flash memory under FAT file system, this article designs an MLC Flash Translation Layer (MFTL) for flash-memory storage systems which takes constraints of MLC flash memory and access behaviors of FAT file system into consideration. A series of trace-driven simulations was conducted to evaluate the performance of the proposed scheme. Although MFTL is designed for MLC flash memory and FAT file system, it is applicable to SLC flash memory and other file systems as well. Our experiment results show that the proposed MFTL could achieve a good performance for various access patterns even on SLC flash memory.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "7", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Tran:2012:ECB, author = "Nguyen Tran and Frank Chiang and Jinyang Li", title = "Efficient cooperative backup with decentralized trust management", journal = j-TOS, volume = "8", number = "3", pages = "8:1--8:??", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2339118.2339119", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Nov 6 18:17:35 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Existing backup systems are unsatisfactory: commercial backup services are reliable but expensive while peer-to-peer systems are cheap but offer limited assurance of data reliability. This article introduces Friendstore, a system that provides inexpensive and reliable backup by giving users the choice to store backup data only on nodes they trust (typically those owned by friends and colleagues). Because it is built on trusted nodes, Friendstore is not burdened by the complexity required to cope with potentially malicious participants. Friendstore only needs to detect and repair accidental data loss and to ensure balanced storage exchange. The disadvantage of using only trusted nodes is that Friendstore cannot achieve perfect storage utilization. Friendstore is designed for a heterogeneous environment where nodes have very different access link speeds and available disk spaces. To ensure long-term data reliability, a node with limited upload bandwidth refrains from storing more data than its calculated maintainable capacity. A high bandwidth node might be limited by its available disk space. We introduce a simple coding scheme, called XOR(1,2), which doubles a node's ability to store backup information in the same amount of disk space at the cost of doubling the amount of data transferred during restore. Analysis and simulations using long-term node activity traces show that a node can reliably back up tens of gigabytes of data even with low upload bandwidth.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "8", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Abd-El-Malek:2012:FSV, author = "Michael Abd-El-Malek and Matthew Wachs and James Cipar and Karan Sanghi and Gregory R. Ganger and Garth A. Gibson and Michael K. Reiter", title = "File system virtual appliances: {Portable} file system implementations", journal = j-TOS, volume = "8", number = "3", pages = "9:1--9:??", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2339118.2339120", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Nov 6 18:17:35 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "File system virtual appliances (FSVAs) address the portability headaches that plague file system (FS) developers. By packaging their FS implementation in a virtual machine (VM), separate from the VM that runs user applications, they can avoid the need to port the file system to each operating system (OS) and OS version. A small FS-agnostic proxy, maintained by the core OS developers, connects the FSVA to whatever OS the user chooses. This article describes an FSVA design that maintains FS semantics for unmodified FS implementations and provides desired OS and virtualization features, such as a unified buffer cache and VM migration. Evaluation of prototype FSVA implementations in Linux and NetBSD, using Xen as the virtual machine manager (VMM), demonstrates that the FSVA architecture is efficient, FS-agnostic, and able to insulate file system implementations from OS differences that would otherwise require explicit porting.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "9", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Luo:2012:GXC, author = "Xianghong Luo and Jiwu Shu", title = "Generalized {X-code}: an efficient {RAID-6} code for arbitrary size of disk array", journal = j-TOS, volume = "8", number = "3", pages = "10:1--10:??", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2339118.2339121", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Nov 6 18:17:35 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Many RAID-6 codes have been proposed in the literature, but each has its limitations. Horizontal code has the ability to adapt to the arbitrary size of a disk array but its high computational complexity is a major shortcoming. In contrast, the computational complexity of vertical code (e.g. X-code) often achieves the theoretical optimality, but vertical code is limited to using a prime number as the size of the disk array In this article, we propose a novel efficient RAID-6 code for arbitrary size of disk array: generalized X-code. We move the redundant elements along their calculation diagonals in X-code onto two specific disks and change two data elements into redundant elements in order to realize our new code. The generalized X-code achieves optimal encoding and updating complexity and low decoding complexity; in addition, it has the ability to adapt to arbitrary size of disk array. Furthermore, we also provide a method for generalizing horizontal code to achieve optimal encoding and updating complexity while keeping the code's original ability to adapt to arbitrary size of disk array.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "10", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Long:2012:EN, author = "Darrell Long", title = "Editorial note", journal = j-TOS, volume = "8", number = "4", pages = "11:1--11:??", month = nov, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2385603.2385604", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sun May 5 09:02:36 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "11", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Bolosky:2012:ISI, author = "Bill Bolosky and Jason Flinn", title = "Introduction to the special issue {USENIX FAST 2012}", journal = j-TOS, volume = "8", number = "4", pages = "12:1--12:??", month = nov, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2385603.2385605", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sun May 5 09:02:36 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "12", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Shilane:2012:WOR, author = "Philip Shilane and Mark Huang and Grant Wallace and Windsor Hsu", title = "{WAN}-optimized replication of backup datasets using stream-informed delta compression", journal = j-TOS, volume = "8", number = "4", pages = "13:1--13:??", month = nov, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2385603.2385606", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sun May 5 09:02:36 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Replicating data off site is critical for disaster recovery reasons, but the current approach of transferring tapes is cumbersome and error prone. Replicating across a wide area network (WAN) is a promising alternative, but fast network connections are expensive or impractical in many remote locations, so improved compression is needed to make WAN replication truly practical. We present a new technique for replicating backup datasets across a WAN that not only eliminates duplicate regions of files (deduplication) but also compresses similar regions of files with delta compression, which is available as a feature of EMC Data Domain systems. Our main contribution is an architecture that adds stream-informed delta compression to already existing deduplication systems and eliminates the need for new, persistent indexes. Unlike techniques based on knowing a file's version or that use a memory cache, our approach achieves delta compression across all data replicated to a server at any time in the past. From a detailed analysis of datasets and statistics from hundreds of customers using our product, we achieve an additional 2X compression from delta compression beyond deduplication and local compression, which enables customers to replicate data that would otherwise fail to complete within their backup window.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "13", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Kim:2012:RSS, author = "Hyojun Kim and Nitin Agrawal and Cristian Ungureanu", title = "Revisiting storage for smartphones", journal = j-TOS, volume = "8", number = "4", pages = "14:1--14:??", month = nov, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2385603.2385607", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sun May 5 09:02:36 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Conventional wisdom holds that storage is not a big contributor to application performance on mobile devices. Flash storage (the type most commonly used today) draws little power, and its performance is thought to exceed that of the network subsystem. In this article, we present evidence that storage performance does indeed affect the performance of several common applications such as Web browsing, maps, application install, email, and Facebook. For several Android smartphones, we find that just by varying the underlying flash storage, performance over WiFi can typically vary between 100\% and 300\% across applications; in one extreme scenario, the variation jumped to over 2000\%. With a faster network (set up over USB), the performance variation rose even further. We identify the reasons for the strong correlation between storage and application performance to be a combination of poor flash device performance, random I/O from application databases, and heavy-handed use of synchronous writes. Based on our findings, we implement and evaluate a set of pilot solutions to address the storage performance deficiencies in smartphones.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "14", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Fryer:2012:RVF, author = "Daniel Fryer and Kuei Sun and Rahat Mahmood and Tinghao Cheng and Shaun Benjamin and Ashvin Goel and Angela Demke Brown", title = "{Recon}: Verifying file system consistency at runtime", journal = j-TOS, volume = "8", number = "4", pages = "15:1--15:??", month = nov, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2385603.2385608", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sun May 5 09:02:36 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "File system bugs that corrupt metadata on disk are insidious. Existing reliability methods, such as checksums, redundancy, or transactional updates, merely ensure that the corruption is reliably preserved. Typical workarounds, based on using backups or repairing the file system, are painfully slow. Worse, the recovery may result in further corruption. We present Recon, a system that protects file system metadata from buggy file system operations. Our approach leverages file systems that provide crash consistency using transactional updates. We define declarative statements called consistency invariants for a file system. These invariants must be satisfied by each transaction being committed to disk to preserve file system integrity. Recon checks these invariants at commit, thereby minimizing the damage caused by buggy file systems. The major challenges to this approach are specifying invariants and interpreting file system behavior correctly without relying on the file system code. Recon provides a framework for file-system specific metadata interpretation and invariant checking. We show the feasibility of interpreting metadata and writing consistency invariants for the Linux ext3 file system using this framework. Recon can detect random as well as targeted file-system corruption at runtime as effectively as the offline e2fsck file-system checker, with low overhead.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "15", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{You:2013:USL, author = "Gae-Won You and Seung-Won Hwang and Navendu Jain", title = "{Ursa}: Scalable Load and Power Management in Cloud Storage Systems", journal = j-TOS, volume = "9", number = "1", pages = "1:1--1:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435204.2435205", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sun May 5 09:02:36 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Enterprise and cloud data centers are comprised of tens of thousands of servers providing petabytes of storage to a large number of users and applications. At such a scale, these storage systems face two key challenges: (1) hot-spots due to the dynamic popularity of stored objects; and (2) high operational costs due to power and cooling. Existing storage solutions, however, are unsuitable to address these challenges because of the large number of servers and data objects. This article describes the design, implementation, and evaluation of Ursa, a system that scales to a large number of storage nodes and objects, and aims to minimize latency and bandwidth costs during system reconfiguration. Toward this goal, Ursa formulates an optimization problem that selects a subset of objects from hot-spot servers and performs topology-aware migration to minimize reconfiguration costs. As exact optimization is computationally expensive, we devise scalable approximation techniques for node selection and efficient divide-and-conquer computation. We also show that the same dynamic reconfiguration techniques can be leveraged to reduce power costs by dynamically migrating data off under-utilized nodes, and powering up servers neighboring existing hot-spots to reduce reconfiguration costs. Our evaluation shows that Ursa achieves cost-effective load management, is time-responsive in computing placement decisions (e.g., about two minutes for 10K nodes and 10M objects), and provides power savings of 15\%--37\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "1", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Hatzieleftheriou:2013:IBE, author = "Andromachi Hatzieleftheriou and Stergios V. Anastasiadis", title = "Improving Bandwidth Efficiency for Consistent Multistream Storage", journal = j-TOS, volume = "9", number = "1", pages = "2:1--2:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435204.2435206", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sun May 5 09:02:36 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Synchronous small writes play a critical role in system availability because they safely log recent state modifications for fast recovery from crashes. Demanding systems typically dedicate separate devices to logging for adequate performance during normal operation and redundancy during state reconstruction. However, storage stacks enforce page-sized granularity in data transfers from memory to disk. Thus, they consume excessive storage bandwidth to handle small writes, which hurts performance. The problem becomes worse, as filesystems often handle multiple concurrent streams, which effectively generate random I/O traffic. In a journaled filesystem, we introduce wasteless journaling as a mount mode that coalesces synchronous concurrent small writes of data into full page-sized journal blocks. Additionally, we propose selective journaling to automatically activate wasteless journaling on data writes with size below a fixed threshold. We implemented a functional prototype of our design over a widely-used filesystem. Our modes are compared against existing methods using microbenchmarks and application-level workloads on stand-alone servers and a multitier networked system. We examine synchronous and asynchronous writes. Coalescing small data updates to the journal sequentially preserves filesystem consistency while it reduces consumed bandwidth up to several factors, decreases recovery time up to 22\%, and lowers write latency up to orders of magnitude.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "2", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Huang:2013:PCF, author = "Cheng Huang and Minghua Chen and Jin Li", title = "{Pyramid Codes}: Flexible Schemes to Trade Space for Access Efficiency in Reliable Data Storage Systems", journal = j-TOS, volume = "9", number = "1", pages = "3:1--3:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435204.2435207", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sun May 5 09:02:36 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "We design flexible schemes to explore the tradeoffs between storage space and access efficiency in reliable data storage systems. Aiming at this goal, two new classes of erasure-resilient codes are introduced --- Basic Pyramid Codes (BPC) and Generalized Pyramid Codes (GPC). Both schemes require slightly more storage space than conventional schemes, but significantly improve the critical performance of read during failures and unavailability. As a by-product, we establish a necessary matching condition to characterize the limit of failure recovery, that is, unless the matching condition is satisfied, a failure case is impossible to recover. In addition, we define a maximally recoverable (MR) property. For all ERC schemes holding the MR property, the matching condition becomes sufficient, that is, all failure cases satisfying the matching condition are indeed recoverable. We show that GPC is the first class of non-MDS schemes holding the MR property.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "3", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Huang:2013:ERD, author = "Jianzhong Huang and Fenghao Zhang and Xiao Qin and Changsheng Xie", title = "Exploiting Redundancies and Deferred Writes to Conserve Energy in Erasure-Coded Storage Clusters", journal = j-TOS, volume = "9", number = "2", pages = "4:1--4:??", month = jul, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2491472.2491473", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 12 18:12:43 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "We present a power-efficient scheme for erasure-coded storage clusters --- ECS$^2$ --- which aims to offer high energy efficiency with marginal reliability degradation. ECS$^2$ utilizes data redundancies and deferred writes to conserve energy. In ECS$^2$ parity blocks are buffered exclusively in active data nodes whereas parity nodes are placed into low-power mode. $ (k + r, k) $ RS-coded ECS$^2$ can achieve $ \lceil (r + 1) / 2 \rceil $-fault tolerance for $k$ active data nodes and $r$-fault tolerance for all $ k + r $ nodes. ECS$^2$ employs the following three optimizing approaches to improve the energy efficiency of storage clusters. (1) An adaptive threshold policy takes system configurations and I/O workloads into account to maximize standby time periods; (2) a selective activation policy minimizes the number of power-transitions in storage nodes; and (3) a region-based buffer policy speeds up the synchronization process by migrating parity blocks in a batch method. After implementing an ECS$^2$ -based prototype in a Linux cluster, we evaluated its energy efficiency and performance using four different types of I/O workloads. The experimental results indicate that compared to energy-oblivious erasure-coded storage, ECS$^2$ can save the energy used by storage clusters up to 29.8\% and 28.0\% in read-intensive and write-dominated workloads when $ k = 6 $ and $ r = 3 $, respectively. The results also show that ECS$^2$ accomplishes high power efficiency in both normal and failed cases without noticeably affecting the I/O performance of storage clusters.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "4", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Altiparmak:2013:GOR, author = "Nihat Altiparmak and Ali Saman Tosun", title = "Generalized Optimal Response Time Retrieval of Replicated Data from Storage Arrays", journal = j-TOS, volume = "9", number = "2", pages = "5:1--5:??", month = jul, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2491472.2491474", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 12 18:12:43 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Declustering techniques reduce query response times through parallel I/O by distributing data among parallel disks. Recently, replication-based approaches were proposed to further reduce the response time. Efficient retrieval of replicated data from multiple disks is a challenging problem. Existing retrieval techniques are designed for storage arrays with identical disks, having no initial load or network delay. In this article, we consider the generalized retrieval problem of replicated data where the disks in the system might be heterogeneous, the disks may have initial load, and the storage arrays might be located on different sites. We first formulate the generalized retrieval problem using a Linear Programming (LP) model and solve it with mixed integer programming techniques. Next, the generalized retrieval problem is formulated as a more efficient maximum flow problem. We prove that the retrieval schedule returned by the maximum flow technique yields the optimal response time and this result matches the LP solution. We also propose a low-complexity online algorithm for the generalized retrieval problem by not guaranteeing the optimality of the result. Performance of proposed and state of the art retrieval strategies are investigated using various replication schemes, query types, query loads, disk specifications, network delays, and initial loads.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "5", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Sankar:2013:DSE, author = "Sriram Sankar and Mark Shaw and Kushagra Vaid and Sudhanva Gurumurthi", title = "Datacenter Scale Evaluation of the Impact of Temperature on Hard Disk Drive Failures", journal = j-TOS, volume = "9", number = "2", pages = "6:1--6:24", month = jul, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2491472.2491475", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 12 18:12:43 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "With the advent of cloud computing and online services, large enterprises rely heavily on their datacenters to serve end users. A large datacenter facility incurs increased maintenance costs in addition to service unavailability when there are increased failures. Among different server components, hard disk drives are known to contribute significantly to server failures; however, there is very little understanding of the major determinants of disk failures in datacenters. In this work, we focus on the interrelationship between temperature, workload, and hard disk drive failures in a large scale datacenter. We present a dense storage case study from a population housing thousands of servers and tens of thousands of disk drives, hosting a large-scale online service at Microsoft. We specifically establish correlation between temperatures and failures observed at different location granularities: (a) inside drive locations in a server chassis, (b) across server locations in a rack, and (c) across multiple racks in a datacenter. We show that temperature exhibits a stronger correlation to failures than the correlation of disk utilization with drive failures. We establish that variations in temperature are not significant in datacenters and have little impact on failures. We also explore workload impacts on temperature and disk failures and show that the impact of workload is not significant. We then experimentally evaluate knobs that control disk drive temperature, including workload and chassis design knobs. We corroborate our findings from the real data study and show that workload knobs show minimal impact on temperature. Chassis knobs like disk placement and fan speeds have a larger impact on temperature. Finally, we also show the proposed cost benefit of temperature optimizations that increase hard disk drive reliability.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "6", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Wu:2013:SFS, author = "Xiaojian Wu and Sheng Qiu and A. L. Narasimha Reddy", title = "{SCMFS}: a File System for Storage Class Memory and its Extensions", journal = j-TOS, volume = "9", number = "3", pages = "7:1--7:??", month = aug, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2501620.2501621", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 12 18:12:47 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/linux.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib", abstract = "Modern computer systems have been built around the assumption that persistent storage is accessed via a slow, block-based interface. However, emerging nonvolatile memory technologies (sometimes referred to as storage class memory (SCM)), are poised to revolutionize storage systems. The SCM devices can be attached directly to the memory bus and offer fast, fine-grained access to persistent storage. In this article, we propose a new file system --- SCMFS, which is specially designed for Storage Class Memory. SCMFS is implemented on the virtual address space and utilizes the existing memory management module of the operating system to help mange the file system space. As a result, we largely simplified the file system operations of SCMFS, which allowed us a better exploration of performance gain from SCM. We have implemented a prototype in Linux and evaluated its performance through multiple benchmarks. The experimental results show that SCMFS outperforms other memory resident file systems, tmpfs, ramfs and ext2 on ramdisk, and achieves about 70\% of memory bandwidth for file read/write operations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "7", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Natanzon:2013:DSA, author = "Assaf Natanzon and Eitan Bachmat", title = "Dynamic Synchronous\slash Asynchronous Replication", journal = j-TOS, volume = "9", number = "3", pages = "8:1--8:??", month = aug, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2508011", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 12 18:12:47 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Online, remote, data replication is critical for today's enterprise IT organization. Availability of data is key to the success of the organization. A few hours of downtime can cost from thousands to millions of dollars With increasing frequency, companies are instituting disaster recovery plans to ensure appropriate data availability in the event of a catastrophic failure or disaster that destroys a site (e.g. flood, fire, or earthquake). Synchronous and asynchronous replication technologies have been available for a long period of time. Synchronous replication has the advantage of no data loss, but due to latency, synchronous replication is limited by distance and bandwidth. Asynchronous replication on the other hand has no distance limitation, but leads to some data loss which is proportional to the data lag. We present a novel method, implemented within EMC Recover-Point, which allows the system to dynamically move between these replication options without any disruption to the I/O path. As latency grows, the system will move from synchronous replication to semi-synchronous replication and then to snapshot shipping. It returns to synchronous replication as more bandwidth is available and latency allows.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "8", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Rodeh:2013:BLB, author = "Ohad Rodeh and Josef Bacik and Chris Mason", title = "{BTRFS}: The {Linux} {B}-Tree Filesystem", journal = j-TOS, volume = "9", number = "3", pages = "9:1--9:32", month = aug, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2501620.2501623", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 12 18:12:47 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/linux.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib", abstract = "BTRFS is a Linux filesystem that has been adopted as the default filesystem in some popular versions of Linux. It is based on copy-on-write, allowing for efficient snapshots and clones. It uses B-trees as its main on-disk data structure. The design goal is to work well for many use cases and workloads. To this end, much effort has been directed to maintaining even performance as the filesystem ages, rather than trying to support a particular narrow benchmark use-case. Linux filesystems are installed on smartphones as well as enterprise servers. This entails challenges on many different fronts.\par --- Scalability. The filesystem must scale in many dimensions: disk space, memory, and CPUs.\par --- Data integrity. Losing data is not an option, and much effort is expended to safeguard the content. This includes checksums, metadata duplication, and RAID support built into the filesystem.\par --- Disk diversity. The system should work well with SSDs and hard disks. It is also expected to be able to use an array of different sized disks, which poses challenges to the RAID and striping mechanisms.\par This article describes the core ideas, data structures, and algorithms of this filesystem. It sheds light on the challenges posed by defragmentation in the presence of snapshots, and the tradeoffs required to maintain even performance in the face of a wide spectrum of workloads.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "9", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Jiang:2013:PSE, author = "Song Jiang and Xiaoning Ding and Yuehai Xu and Kei Davis", title = "A Prefetching Scheme Exploiting both Data Layout and Access History on Disk", journal = j-TOS, volume = "9", number = "3", pages = "10:1--10:??", month = aug, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2508010", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 12 18:12:47 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Prefetching is an important technique for improving effective hard disk performance. A prefetcher seeks to accurately predict which data will be requested and load it ahead of the arrival of the corresponding requests. Current disk prefetch policies in major operating systems track access patterns at the level of file abstraction. While this is useful for exploiting application-level access patterns, for two reasons file-level prefetching cannot realize the full performance improvements achievable by prefetching. First, certain prefetch opportunities can only be detected by knowing the data layout on disk, such as the contiguous layout of file metadata or data from multiple files. Second, nonsequential access of disk data (requiring disk head movement) is much slower than sequential access, and the performance penalty for mis-prefetching a randomly located block, relative to that of a sequential block, is correspondingly greater. To overcome the inherent limitations of prefetching at logical file level, we propose to perform prefetching directly at the level of disk layout, and in a portable way. Our technique, called DiskSeen, is intended to be supplementary to, and to work synergistically with, any present file-level prefetch policies. DiskSeen tracks the locations and access times of disk blocks and, based on analysis of their temporal and spatial relationships, seeks to improve the sequentiality of disk accesses and overall prefetching performance. It also implements a mechanism to minimize mis-prefetching, on a per-application basis, to mitigate the corresponding performance penalty. Our implementation of the DiskSeen scheme in the Linux 2.6 kernel shows that it can significantly improve the effectiveness of prefetching, reducing execution times by 20\%--60\% for microbenchmarks and real applications such as grep, CVS, and TPC-H. Even for workloads specifically designed to expose its weaknesses, DiskSeen incurs only minor performance loss.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "10", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Zhang:2013:DEN, author = "Guangyan Zhang and Weimin Zheng and Keqin Li", title = "Design and Evaluation of a New Approach to {RAID-0} Scaling", journal = j-TOS, volume = "9", number = "4", pages = "11:1--11:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2491054", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 12 18:12:51 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Scaling up a RAID-0 volume with added disks can increase its storage capacity and I/O bandwidth simultaneously. For preserving a round-robin data distribution, existing scaling approaches require all the data to be migrated. Such large data migration results in a long redistribution time as well as a negative impact on application performance. In this article, we present a new approach to RAID-0 scaling called FastScale. First, FastScale minimizes data migration, while maintaining a uniform data distribution. It moves only enough data blocks from old disks to fill an appropriate fraction of new disks. Second, FastScale optimizes data migration with access aggregation and lazy checkpoint. Access aggregation enables data migration to have a larger throughput due to a decrement of disk seeks. Lazy checkpoint minimizes the number of metadata writes without compromising data consistency. Using several real system disk traces, we evaluate the performance of FastScale through comparison with SLAS, one of the most efficient existing scaling approaches. The experiments show that FastScale can reduce redistribution time by up to 86.06\% with smaller application I/O latencies. The experiments also illustrate that the performance of RAID-0 scaled using FastScale is almost identical to, or even better than, that of the round-robin RAID-0.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "11", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Bessani:2013:DDS, author = "Alysson Bessani and Miguel Correia and Bruno Quaresma and Fernando Andr{\'e} and Paulo Sousa", title = "{DepSky}: Dependable and Secure Storage in a Cloud-of-Clouds", journal = j-TOS, volume = "9", number = "4", pages = "12:1--12:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2535929", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 12 18:12:51 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "The increasing popularity of cloud storage services has lead companies that handle critical data to think about using these services for their storage needs. Medical record databases, large biomedical datasets, historical information about power systems and financial data are some examples of critical data that could be moved to the cloud. However, the reliability and security of data stored in the cloud still remain major concerns. In this work we present DepSky, a system that improves the availability, integrity, and confidentiality of information stored in the cloud through the encryption, encoding, and replication of the data on diverse clouds that form a cloud-of-clouds. We deployed our system using four commercial clouds and used PlanetLab to run clients accessing the service from different countries. We observed that our protocols improved the perceived availability, and in most cases, the access latency, when compared with cloud providers individually. Moreover, the monetary costs of using DepSky in this scenario is at most twice the cost of using a single cloud, which is optimal and seems to be a reasonable cost, given the benefits.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "12", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Kwon:2013:HAF, author = "Se Jin Kwon and Hyung-Ju Cho and Tae-Sun Chung", title = "Hybrid Associative Flash Translation Layer for the Performance Optimization of Chip-Level Parallel Flash Memory", journal = j-TOS, volume = "9", number = "4", pages = "13:1--13:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2535931", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 12 18:12:51 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Flash memory is used widely in the data storage market, particularly low-price MultiLevel Cell (MLC) flash memory, which has been adopted by large-scale storage systems despite its low performance. To overcome the poor performance of MLC flash memory, a system architecture has been designed to optimize chip-level parallelism. This design increases the size of the page unit and the block unit, thereby simultaneously executing operations on multiple chips. Unfortunately, its Flash Translation Layer (FTL) generates many unused sectors in each page, which leads to unnecessary write operations. Furthermore, it reuses an earlier log block scheme, although it generates many erase operations because of its low space utilization. To solve these problems, we propose a hybrid associative FTL (Hybrid-FTL) to enhance the performance of the chip-level parallel flash memory system. Hybrid-FTL reduces the number of write operations by utilizing all of the unused sectors. Furthermore, it reduces the overall number of erase operations by classifying data as hot, cold, or fragment data. Hybrid-FTL requires less mapping information in the DRAM and in the flash memory compared with previous FTL algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "13", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Xie:2013:EHA, author = "Yulai Xie and Kiran-Kumar Muniswamy-Reddy and Dan Feng and Yan Li and Darrell D. E. Long", title = "Evaluation of a Hybrid Approach for Efficient Provenance Storage", journal = j-TOS, volume = "9", number = "4", pages = "14:1--14:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2501986", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 12 18:12:51 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Provenance is the metadata that describes the history of objects. Provenance provides new functionality in a variety of areas, including experimental documentation, debugging, search, and security. As a result, a number of groups have built systems to capture provenance. Most of these systems focus on provenance collection, a few systems focus on building applications that use the provenance, but all of these systems ignore an important aspect: efficient long-term storage of provenance. In this article, we first analyze the provenance collected from multiple workloads and characterize the properties of provenance with respect to long-term storage. We then propose a hybrid scheme that takes advantage of the graph structure of provenance data and the inherent duplication in provenance data. Our evaluation indicates that our hybrid scheme, a combination of Web graph compression (adapted for provenance) and dictionary encoding, provides the best trade-off in terms of compression ratio, compression time, and query performance when compared to other compression schemes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "14", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Lee:2014:UBC, author = "Eunji Lee and Hyokyung Bahn and Sam H. Noh", title = "A Unified Buffer Cache Architecture that Subsumes Journaling Functionality via Nonvolatile Memory", journal = j-TOS, volume = "10", number = "1", pages = "1:1--1:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2560010", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Feb 5 16:53:47 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/linux.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib", abstract = "Journaling techniques are widely used in modern file systems as they provide high reliability and fast recovery from system failures. However, it reduces the performance benefit of buffer caching as journaling accounts for a bulk of the storage writes in real system environments. To relieve this problem, we present a novel buffer cache architecture that subsumes the functionality of caching and journaling by making use of nonvolatile memory such as PCM or STT-MRAM. Specifically, our buffer cache supports what we call the in-place commit scheme. This scheme avoids logging, but still provides the same journaling effect by simply altering the state of the cached block to frozen. As a frozen block still provides the functionality of a cache block, we show that in-place commit does not degrade cache performance. We implement our scheme on Linux 2.6.38 and measure the throughput and execution time of the scheme with various file I/O benchmarks. The results show that our scheme improves the throughput and execution time by 89\% and 34\% on average, respectively, compared to the existing Linux buffer cache with ext4 without any loss of reliability.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "1", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Ma:2014:FFF, author = "Ao Ma and Chris Dragga and Andrea C. Arpaci-Dusseau and Remzi H. Arpaci-Dusseau and Marshall Kirk McKusick", title = "{Ffsck}: The Fast File-System Checker", journal = j-TOS, volume = "10", number = "1", pages = "2:1--2:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2560011", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Feb 5 16:53:47 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib", abstract = "Failures, errors, and bugs can corrupt file systems and cause data loss, despite the presence of journals and similar preventive techniques. While consistency checkers such as fsck can detect corruption and repair a damaged image, they are generally created as an afterthought, to be run only at rare intervals. Thus, checkers operate slowly, causing significant downtime for large scale storage systems. We address this dilemma by treating the checker as a key component of the overall file system, rather than a peripheral add-on. To this end, we present a modified ext3 file system, rext 3, to directly support the fast file-system checker, ffsck. Rext3 colocates and self-identifies its metadata blocks, removing the need for costly seeks and tree traversals during checking. These modifications allow ffsck to scan and repair the file system at rates approaching the full sequential bandwidth of the underlying device. In addition, we demonstrate that rext3 generally performs competitively with ext3 and exceeds it in handling random reads and large writes. Finally, we apply our principles to FreeBSD's FFS file system and its checker, doing so in a lightweight fashion that preserves the file-system layout while still providing some of the performance gains from ffsck.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "2", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Lu:2014:SLF, author = "Lanyue Lu and Andrea C. Arpaci-Dusseau and Remzi H. Arpaci-Dusseau and Shan Lu", title = "A Study of {Linux} File System Evolution", journal = j-TOS, volume = "10", number = "1", pages = "3:1--3:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2560012", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Feb 5 16:53:47 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/linux.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib", abstract = "We conduct a comprehensive study of file-system code evolution. By analyzing eight years of Linux file-system changes across 5079 patches, we derive numerous new (and sometimes surprising) insights into the file-system development process; our results should be useful for both the development of file systems themselves as well as the improvement of bug-finding tools.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "3", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Plank:2014:SDS, author = "James S. Plank and Mario Blaum", title = "Sector-Disk {(SD)} Erasure Codes for Mixed Failure Modes in {RAID} Systems", journal = j-TOS, volume = "10", number = "1", pages = "4:1--4:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2560013", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Feb 5 16:53:47 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Traditionally, when storage systems employ erasure codes, they are designed to tolerate the failures of entire disks. However, the most common types of failures are latent sector failures, which only affect individual disk sectors, and block failures which arise through wear on SSD's. This article introduces SD codes, which are designed to tolerate combinations of disk and sector failures. As such, they consume far less storage resources than traditional erasure codes. We specify the codes with enough detail for the storage practitioner to employ them, discuss their practical properties, and detail an open-source implementation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "4", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Choi:2014:THP, author = "Jae Woo Choi and Dong In Shin and Young Jin Yu and Hyeonsang Eom and Heon Young Yeom", title = "Towards High-Performance {SAN} with Fast Storage Devices", journal = j-TOS, volume = "10", number = "2", pages = "5:1--5:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2577385", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Apr 1 05:59:01 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Storage area network (SAN) is one of the most popular solutions for constructing server environments these days. In these kinds of server environments, HDD-based storage usually becomes the bottleneck of the overall system, but it is not enough to merely replace the devices with faster ones in order to exploit their high performance. In other words, proper optimizations are needed to fully utilize their performance gains. In this work, we first adopted a DRAM-based SSD as a fast backend-storage in the existing SAN environment, and found significant performance degradation compared to its own capabilities, especially in the case of small-sized random I/O pattern, even though a high-speed network was used. We have proposed three optimizations to solve this problem: (1) removing software overhead in the SAN I/O path; (2) increasing parallelism in the procedures for handling I/O requests; and (3) adopting the temporal merge mechanism to reduce network overheads. We have implemented them as a prototype and found that our approaches make substantial performance improvements by up to 39\% and 280\% in terms of both the latency and bandwidth, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "5", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Mao:2014:RPO, author = "Bo Mao and Hong Jiang and Suzhen Wu and Yinjin Fu and Lei Tian", title = "Read-Performance Optimization for Deduplication-Based Storage Systems in the Cloud", journal = j-TOS, volume = "10", number = "2", pages = "6:1--6:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2512348", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Apr 1 05:59:01 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Data deduplication has been demonstrated to be an effective technique in reducing the total data transferred over the network and the storage space in cloud backup, archiving, and primary storage systems, such as VM (virtual machine) platforms. However, the performance of restore operations from a deduplicated backup can be significantly lower than that without deduplication. The main reason lies in the fact that a file or block is split into multiple small data chunks that are often located in different disks after deduplication, which can cause a subsequent read operation to invoke many disk IOs involving multiple disks and thus degrade the read performance significantly. While this problem has been by and large ignored in the literature thus far, we argue that the time is ripe for us to pay significant attention to it in light of the emerging cloud storage applications and the increasing popularity of the VM platform in the cloud. This is because, in a cloud storage or VM environment, a simple read request on the client side may translate into a restore operation if the data to be read or a VM suspended by the user was previously deduplicated when written to the cloud or the VM storage server, a likely scenario considering the network bandwidth and storage capacity concerns in such an environment. To address this problem, in this article, we propose SAR, an SSD (solid-state drive)-Assisted Read scheme, that effectively exploits the high random-read performance properties of SSDs and the unique data-sharing characteristic of deduplication-based storage systems by storing in SSDs the unique data chunks with high reference count, small size, and nonsequential characteristics. In this way, many read requests to HDDs are replaced by read requests to SSDs, thus significantly improving the read performance of the deduplication-based storage systems in the cloud. The extensive trace-driven and VM restore evaluations on the prototype implementation of SAR show that SAR outperforms the traditional deduplication-based and flash-based cache schemes significantly, in terms of the average response times.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "6", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Elerath:2014:BMC, author = "Jon G. Elerath and Jiri Schindler", title = "Beyond {MTTDL}: a Closed-Form {RAID 6} Reliability Equation", journal = j-TOS, volume = "10", number = "2", pages = "7:1--7:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2577386", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Apr 1 05:59:01 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", note = "See rebuttal \cite{Iliadis:2015:RBM}.", abstract = "We introduce a new closed-form equation for estimating the number of data-loss events for a redundant array of inexpensive disks in a RAID-6 configuration. The equation expresses operational failures, their restorations, latent (sector) defects, and disk media scrubbing by time-based distributions that can represent non-homogeneous Poisson processes. It uses two-parameter Weibull distributions that allows the distributions to take on many different shapes, modeling increasing, decreasing, or constant occurrence rates. This article focuses on the statistical basis of the equation. It also presents time-based distributions of the four processes based on an extensive analysis of field data collected over several years from 10,000s of commercially available systems with 100,000s of disk drives. Our results for RAID-6 groups of size 16 indicate that the closed-form expression yields much more accurate results compared to the MTTDL reliability equation and matching computationally-intensive Monte Carlo simulations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "7", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Desnoyers:2014:AMS, author = "Peter Desnoyers", title = "Analytic Models of {SSD} Write Performance", journal = j-TOS, volume = "10", number = "2", pages = "8:1--8:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2577384", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Apr 1 05:59:01 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Solid-state drives (SSDs) update data by writing a new copy, rather than overwriting old data, causing prior copies of the same data to be invalidated. These writes are performed in units of pages, while space is reclaimed in units of multipage erase blocks, necessitating copying of any remaining valid pages in the block before reclamation. The efficiency of this cleaning process greatly affects performance under random workloads; in particular, in SSDs, the write bottleneck is typically internal media throughput, and write amplification due to additional internal copying directly reduces application throughput. We present the first nearly-exact closed-form solution for write amplification under greedy cleaning for uniformly-distributed random traffic, validate its accuracy via simulation, and show that its inaccuracies are negligible for reasonable block sizes and overprovisioning ratios. In addition, we also present the first models which predict performance degradation for both LRW (least-recently-written) cleaning and greedy cleaning under simple nonuniform traffic conditions; simulation results show the first model to be exact and the second to be accurate within 2\%. We extend the LRW model to arbitrary combinations of random traffic and demonstrate its use in predicting cleaning performance for real-world workloads. Using these analytic models, we examine the strategy of separating ``hot'' and ``cold'' data, showing that for our traffic model, such separation eliminates any loss in performance due to nonuniform traffic. We then show how a system which segregates hot and cold data into different block pools may shift free space between these pools in order to achieve improved performance, and how numeric methods may be used with our model to find the optimum operating point, which approaches a write amplification of 1.0 for increasingly skewed traffic. We examine online methods for achieving this optimal operating point and show a control strategy based on our model which achieves high performance for a number of real-world block traces.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "8", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Miranda:2014:RSE, author = "Alberto Miranda and Sascha Effert and Yangwook Kang and Ethan L. Miller and Ivan Popov and Andre Brinkmann and Tom Friedetzky and Toni Cortes", title = "Random Slicing: Efficient and Scalable Data Placement for Large-Scale Storage Systems", journal = j-TOS, volume = "10", number = "3", pages = "9:1--9:??", month = jul, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2632230", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Aug 12 16:53:23 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "The ever-growing amount of data requires highly scalable storage solutions. The most flexible approach is to use storage pools that can be expanded and scaled down by adding or removing storage devices. To make this approach usable, it is necessary to provide a solution to locate data items in such a dynamic environment. This article presents and evaluates the Random Slicing strategy, which incorporates lessons learned from table-based, rule-based, and pseudo-randomized hashing strategies and is able to provide a simple and efficient strategy that scales up to handle exascale data. Random Slicing keeps a small table with information about previous storage system insert and remove operations, drastically reducing the required amount of randomness while delivering a perfect load distribution.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "9", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Saxena:2014:DPS, author = "Mohit Saxena and Michael M. Swift", title = "Design and Prototype of a Solid-State Cache", journal = j-TOS, volume = "10", number = "3", pages = "10:1--10:??", month = jul, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629491", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Aug 12 16:53:23 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "The availability of high-speed solid-state storage has introduced a new tier into the storage hierarchy. Low-latency and high-IOPS solid-state drives (SSDs) cache data in front of high-capacity disks. However, most existing SSDs are designed to be a drop-in disk replacement, and hence are mismatched for use as a cache. This article describes FlashTier, a system architecture built upon a solid-state cache (SSC), which is a flash device with an interface designed for caching. Management software at the operating system block layer directs caching. The FlashTier design addresses three limitations of using traditional SSDs for caching. First, FlashTier provides a unified logical address space to reduce the cost of cache block management within both the OS and the SSD. Second, FlashTier provides a new SSC block interface to enable a warm cache with consistent data after a crash. Finally, FlashTier leverages cache behavior to silently evict data blocks during garbage collection to improve performance of the SSC. We first implement an SSC simulator and a cache manager in Linux to perform an in-depth evaluation and analysis of FlashTier's design techniques. Next, we develop a prototype of SSC on the OpenSSD Jasmine hardware platform to investigate the benefits and practicality of FlashTier design. Our prototyping experiences provide insights applicable to managing modern flash hardware, implementing other SSD prototypes and new OS storage stack interface extensions. Overall, we find that FlashTier improves cache performance by up to 168\% over consumer-grade SSDs and up to 52\% over high-end SSDs. It also improves flash lifetime for write-intensive workloads by up to 60\% compared to SSD caches with a traditional flash interface.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "10", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Lee:2014:CSH, author = "Eunji Lee and Hyokyung Bahn", title = "Caching Strategies for High-Performance Storage Media", journal = j-TOS, volume = "10", number = "3", pages = "11:1--11:??", month = jul, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2633691", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Aug 12 16:53:23 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Due to the large access latency of hard disks during data retrieval in computer systems, buffer caching mechanisms have been studied extensively in database and operating systems. By storing requested data into the buffer cache, subsequent requests can be directly serviced without accessing slow disk storage. Meanwhile, high-speed storage media like PCM (phase-change memory) have emerged recently, and one may wonder if the traditional buffer cache will be still effective for these high-speed storage media. This article answers the question by showing that the buffer cache is still effective in such environments due to the software overhead and the bimodal data access characteristics. Based on this observation, we present a new buffer cache management scheme appropriately designed for the system where the speed gap between cache and storage is narrow. To this end, we analyze the condition that caching will be effective and find the characteristics of access patterns that can be exploited in managing buffer cache for high performance storage like PCM.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "11", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Sun:2014:LDL, author = "Zhiwei Sun and Anthony Skjellum and Lee Ward and Matthew L. Curry", title = "A Lightweight Data Location Service for Nondeterministic Exascale Storage Systems", journal = j-TOS, volume = "10", number = "3", pages = "12:1--12:??", month = jul, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629451", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Aug 12 16:53:23 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "In this article, we present LWDLS, a lightweight data location service designed for Exascale storage systems (storage systems with order of 10$^{18}$ bytes) and geo-distributed storage systems (large storage systems with physically distributed locations). LWDLS provides a search-based data location solution, and enables free data placement, movement, and replication. In LWDLS, probe and prune protocols are introduced that reduce topology mismatch, and a heuristic flooding search algorithm (HFS) is presented that achieves higher search efficiency than pure flooding search while having comparable search speed and coverage to the pure flooding search. LWDLS is lightweight and scalable in terms of incorporating low overhead, high search efficiency, no global state, and avoiding periodic messages. LWDLS is fully distributed and can be used in nondeterministic storage systems and in deterministic storage systems to deal with cases where search is needed. Extensive simulations modeling large-scale High Performance Computing (HPC) storage environments provide representative performance outcomes. Performance is evaluated by metrics including search scope, search efficiency, and average neighbor distance. Results show that LWDLS is able to locate data efficiently with low cost of state maintenance in arbitrary network environments. Through these simulations, we demonstrate the effectiveness of protocols and search algorithm of LWDLS.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "12", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Schroeder:2014:ISI, author = "Bianca Schroeder and Eno Thereska", title = "Introduction to the Special Issue on {USENIX FAST 2014}", journal = j-TOS, volume = "10", number = "4", pages = "13:1--13:??", month = oct, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2670792", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Oct 31 16:06:21 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "13", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Li:2014:SCG, author = "Mingqiang Li and Patrick P. C. Lee", title = "{STAIR} Codes: a General Family of Erasure Codes for Tolerating Device and Sector Failures", journal = j-TOS, volume = "10", number = "4", pages = "14:1--14:??", month = oct, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2658991", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Oct 31 16:06:21 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Practical storage systems often adopt erasure codes to tolerate device failures and sector failures, both of which are prevalent in the field. However, traditional erasure codes employ device-level redundancy to protect against sector failures, and hence incur significant space overhead. Recent sector-disk (SD) codes are available only for limited configurations. By making a relaxed but practical assumption, we construct a general family of erasure codes called STAIR codes, which efficiently and provably tolerate both device and sector failures without any restriction on the size of a storage array and the numbers of tolerable device failures and sector failures. We propose the upstairs encoding and downstairs encoding methods, which provide complementary performance advantages for different configurations. We conduct extensive experiments on STAIR codes in terms of space saving, encoding/decoding speed, and update cost. We demonstrate that STAIR codes not only improve space efficiency over traditional erasure codes, but also provide better computational efficiency than SD codes based on our special code construction. Finally, we present analytical models that characterize the reliability of STAIR codes, and show that the support of a wider range of configurations by STAIR codes is critical for tolerating sector failure bursts discovered in the field.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "14", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Kim:2014:EPC, author = "Hyojun Kim and Sangeetha Seshadri and Clement L. Dickey and Lawrence Chiu", title = "Evaluating Phase Change Memory for Enterprise Storage Systems: a Study of Caching and Tiering Approaches", journal = j-TOS, volume = "10", number = "4", pages = "15:1--15:??", month = oct, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2668128", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Oct 31 16:06:21 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Storage systems based on Phase Change Memory (PCM) devices are beginning to generate considerable attention in both industry and academic communities. But whether the technology in its current state will be a commercially and technically viable alternative to entrenched technologies such as flash-based SSDs remains undecided. To address this, it is important to consider PCM SSD devices not just from a device standpoint, but also from a holistic perspective. This article presents the results of our performance study of a recent all-PCM SSD prototype. The average latency for a 4KiB random read is 6.7 $ \mu $ s, which is about $ 16 \times $ faster than a comparable eMLC flash SSD. The distribution of I/O response times is also much narrower than flash SSD for both reads and writes. Based on the performance measurements and real-world workload traces, we explore two typical storage use cases: tiering and caching. We report that the IOPS/\$ of a tiered storage system can be improved by 12--66\% and the aggregate elapsed time of a server-side caching solution can be improved by up to 35\% by adding PCM. Our results show that (even at current price points) PCM storage devices show promising performance as a new component in enterprise storage systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "15", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Xu:2014:APE, author = "Lianghong Xu and James Cipar and Elie Krevat and Alexey Tumanov and Nitin Gupta and Michael A. Kozuch and Gregory R. Ganger", title = "Agility and Performance in Elastic Distributed Storage", journal = j-TOS, volume = "10", number = "4", pages = "16:1--16:??", month = oct, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2668129", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Oct 31 16:06:21 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Elastic storage systems can be expanded or contracted to meet current demand, allowing servers to be turned off or used for other tasks. However, the usefulness of an elastic distributed storage system is limited by its agility: how quickly it can increase or decrease its number of servers. Due to the large amount of data they must migrate during elastic resizing, state of the art designs usually have to make painful trade-offs among performance, elasticity, and agility. This article describes the state of the art in elastic storage and a new system, called SpringFS, that can quickly change its number of active servers, while retaining elasticity and performance goals. SpringFS uses a novel technique, termed bounded write offloading, that restricts the set of servers where writes to overloaded servers are redirected. This technique, combined with the read offloading and passive migration policies used in SpringFS, minimizes the work needed before deactivation or activation of servers. Analysis of real-world traces from Hadoop deployments at Facebook and various Cloudera customers and experiments with the SpringFS prototype confirm SpringFS's agility, show that it reduces the amount of data migrated for elastic resizing by up to two orders of magnitude, and show that it cuts the percentage of active servers required by 67--82\%, outdoing state-of-the-art designs by 6--120\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "16", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Fryer:2014:CIT, author = "Daniel Fryer and Mike Qin and Jack Sun and Kah Wai Lee and Angela Demke Brown and Ashvin Goel", title = "Checking the Integrity of Transactional Mechanisms", journal = j-TOS, volume = "10", number = "4", pages = "17:1--17:??", month = oct, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2675113", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Oct 31 16:06:21 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Data corruption is the most common consequence of file-system bugs. When such corruption occurs, offline check and recovery tools must be used, but they are error prone and cause significant downtime. Previously we showed that a runtime checker for the Ext3 file system can verify that metadata updates are consistent, helping detect corruption in metadata blocks at transaction commit time. However, corruption can still occur when a bug in the file system's transactional mechanism loses, misdirects, or corrupts writes. We show that a runtime checker must enforce the atomicity and durability properties of the file system on every write, in addition to checking transactions at commit time, to provide the strong guarantee that every block write will maintain file system consistency. We identify the invariants that need to be enforced on journaling and shadow paging file systems to preserve the integrity of committed transactions. We also describe the key properties that make it feasible to check these invariants for a file system. Based on this characterization, we have implemented runtime checkers for Ext3 and Btrfs. Our evaluation shows that both checkers detect data corruption effectively, and they can be used during normal operation with low overhead.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "17", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Trifonov:2015:LCI, author = "P. Trifonov", title = "Low-Complexity Implementation of {RAID} Based on {Reed--Solomon} Codes", journal = j-TOS, volume = "11", number = "1", pages = "1:1--1:??", month = feb, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700308", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Feb 24 18:13:03 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Fast algorithms are proposed for encoding and reconstructing data in RAID based on Reed--Solomon codes. The proposed approach is based on the cyclotomic fast Fourier transform algorithm and enables one to significantly reduce the number of expensive Galois field multiplications required. The complexity of the obtained algorithms is much lower than those for existing MDS array codes. Software implementation of the proposed algorithms is discussed. The performance results show that the new algorithms provide substantially better performance compared with the standard algorithm.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "1", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Li:2015:EHI, author = "Yan-Kit Li and Min Xu and Chun-Ho Ng and Patrick P. C. Lee", title = "Efficient Hybrid Inline and Out-of-Line Deduplication for Backup Storage", journal = j-TOS, volume = "11", number = "1", pages = "2:1--2:??", month = feb, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2641572", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Feb 24 18:13:03 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Backup storage systems often remove redundancy across backups via inline deduplication, which works by referring duplicate chunks of the latest backup to those of existing backups. However, inline deduplication degrades restore performance of the latest backup due to fragmentation, and complicates deletion of expired backups due to the sharing of data chunks. While out-of-line deduplication addresses the problems by forward-pointing existing duplicate chunks to those of the latest backup, it introduces additional I/Os of writing and removing duplicate chunks. We design and implement RevDedup, an efficient hybrid inline and out-of-line deduplication system for backup storage. It applies coarse-grained inline deduplication to remove duplicates of the latest backup, and then fine-grained out-of-line reverse deduplication to remove duplicates from older backups. Our reverse deduplication design limits the I/O overhead and prepares for efficient deletion of expired backups. Through extensive testbed experiments using synthetic and real-world datasets, we show that RevDedup can bring high performance to the backup, restore, and deletion operations, while maintaining high storage efficiency comparable to conventional inline deduplication.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "2", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Hwang:2015:HHB, author = "Taeho Hwang and Jaemin Jung and Youjip Won", title = "{HEAPO}: Heap-Based Persistent Object Store", journal = j-TOS, volume = "11", number = "1", pages = "3:1--3:??", month = feb, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2629619", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Feb 24 18:13:03 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "In this work, we developed a Heap-Based Persistent Object Store (HEAPO) to manage persistent objects in byte-addressable Nonvolatile RAM (NVRAM). HEAPO defines its own persistent heap layout, the persistent object format, name space organization, object sharing and protection mechanism, and undo-only log-based crash recovery, all of which are effectively tailored for NVRAM. We put our effort into developing a lightweight and flexible layer to exploit the DRAM-like access latency of NVRAM. To address this objective, we developed (i) a native management layer for NVRAM to eliminate redundancy between in-core and on-disk copies of the metadata, (ii) an expandable object format, (iii) a burst trie-based global name space with local name space caching, (iv) static address binding, and (v) minimal logging for undo-only crash recovery. We implemented HEAPO at commodity OS (Linux 2.6.32) and measured the performance. By eliminating metadata redundancy, HEAPO improved the speed of creating, attaching, and expanding an object by $ 1.3 \times $, $ 4.5 \times $, and $ 3.8 \times $, respectively, compared to memory-mapped file-based persistent object store. Burst trie-based name space organization of HEAPO yielded $ 7.6 \times $ better lookup performance compared to hashed B-tree-based name space of EXT4. We modified memcachedb to use HEAPO in maintaining its search structure. For hash table update, HEAPO-based memcachedb yielded $ 3.4 \times $ performance improvement against original memcachedb implementation which uses mmap() over ramdisk approach to maintain the key-value store in memory.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "3", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Wei:2015:ZMZ, author = "Qingsong Wei and Cheng Chen and Mingdi Xue and Jun Yang", title = "{Z-MAP}: a Zone-Based Flash Translation Layer with Workload Classification for Solid-State Drive", journal = j-TOS, volume = "11", number = "1", pages = "4:1--4:??", month = feb, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2629663", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Feb 24 18:13:03 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Existing space management and address mapping schemes for flash-based Solid-State-Drive (SSD) operate either at page or block granularity, with inevitable limitations in terms of memory requirement, performance, garbage collection, and scalability. To overcome these limitations, we proposed a novel space management and address mapping scheme for flash referred to as Z-MAP, which manages flash space at granularity of Zone. Each Zone consists of multiple numbers of flash blocks. Leveraging workload classification, Z-MAP explores Page-mapping Zone (Page Zone) to store random data and handle a large number of partial updates, and Block-mapping Zone (Block Zone) to store sequential data and lower the overall mapping table. Zones are dynamically allocated and a mapping scheme for a Zone is determined only when it is allocated. Z-MAP uses a small part of Flash memory or phase change memory as a streaming Buffer Zone to log data sequentially and migrate data into Page Zone or Block Zone based on workload classification. A two-level address mapping is designed to reduce the overall mapping table and address translation latency. Z-MAP classifies data before it is permanently stored into Flash memory so that different workloads can be isolated and garbage collection overhead can be minimized. Z-MAP has been extensively evaluated by trace-driven simulation and a prototype implementation on OpenSSD. Our benchmark results conclusively demonstrate that Z-MAP can achieve up to 76\% performance improvement, 81\% mapping table reduction, and 88\% garbage collection overhead reduction compared to existing Flash Translation Layer (FTL) schemes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "4", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Gim:2015:SSC, author = "Jongmin Gim and Taeho Hwang and Youjip Won and Krishna Kant", title = "{SmartCon}: Smart Context Switching for Fast Storage {IO} Devices", journal = j-TOS, volume = "11", number = "2", pages = "5:1--5:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2631922", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 24 17:41:03 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Handling of storage IO in modern operating systems assumes that such devices are slow and CPU cycles are valuable. Consequently, to effectively exploit the underlying hardware resources, for example, CPU cycles, storage bandwidth and the like, whenever an IO request is issued to such device, the requesting thread is switched out in favor of another thread that may be ready to execute. Recent advances in nonvolatile storage technologies and multicore CPUs make both of these assumptions increasingly questionable, and an unconditional context switch is no longer desirable. In this article, we propose a novel mechanism called SmartCon, which intelligently decides whether to service a given IO request in interrupt-driven manner or busy-wait--based manner based on not only the device characteristics but also dynamic parameters such as IO latency, CPU utilization, and IO size. We develop an analytic performance model to project the performance of SmartCon for forthcoming devices. We implement SmartCon mechanism on Linux 2.6 and perform detailed evaluation using three different IO devices: Ramdisk, low-end SSD, and high-end SSD. We find that SmartCon yields up to a 39\% performance gain over the mainstream block device approach for Ramdisk, and up to a 45\% gain for PCIe-based SSD and SATA-based SSDs. We examine the detailed behavior of TLB, L1, L2 cache and show that SmartCon achieves significant improvement in all cache misbehaviors.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "5", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Rodeh:2015:VBI, author = "Ohad Rodeh and Haim Helman and David Chambliss", title = "Visualizing Block {IO} Workloads", journal = j-TOS, volume = "11", number = "2", pages = "6:1--6:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2651422", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 24 17:41:03 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Massive block IO systems are the workhorses powering many of today's largest applications. Databases, health care systems, and virtual machine images are examples for block storage applications. The massive scale of these workloads, and the complexity of the underlying storage systems, makes it difficult to pinpoint problems when they occur. This work attempts to shed light on workload patterns through visualization, aiding our intuition. We describe our experience in the last 3 years of analyzing and visualizing customer traces from XIV, an IBM enterprise block storage system. We also present results from applying the same visualization technology to Linux filesystems. We show how visualization aids our understanding of workloads and how it assists in resolving customer performance problems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "6", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Wu:2015:DSF, author = "Chin-Hsien Wu and Kuo-Yi Huang", title = "Data Sorting in Flash Memory", journal = j-TOS, volume = "11", number = "2", pages = "7:1--7:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2665067", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 24 17:41:03 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Because flash memory now provides an economical solution for various portable devices and embedded systems, an NAND flash-based storage system has replaced the hard disk drive in many applications. Recently, the implementation of database systems using an NAND flash-based storage system has become an important research topic. In particular, the external sorting is an important operation in database systems. With the very distinctive characteristics of flash memory, the typical external sorting system that adopts a clustered sorting process can result in performance degradation and reduce the reliability of flash memory. In this article, we will propose an unclustered sorting method that considers the unique characteristics of flash memory, and we then propose a decision rule to exploit the advantages of both clustered and unclustered sorting. The decision rule can separate records according to their record length, sort them appropriately by the clustered and unclustered sorting, and merge the sorted results. The experimental results show that the proposed method can improve performance in an NAND flash-based storage system (i.e., solid-state drive).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "7", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Cho:2015:DTS, author = "Seokhei Cho and Changhyun Park and Youjip Won and Sooyong Kang and Jaehyuk Cha and Sungroh Yoon and Jongmoo Choi", title = "Design Tradeoffs of {SSDs}: From Energy Consumption's Perspective", journal = j-TOS, volume = "11", number = "2", pages = "8:1--8:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2644818", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 24 17:41:03 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "In this work, we studied the energy consumption characteristics of various SSD design parameters. We developed an accurate energy consumption model for SSDs that computes aggregate, as well as component-specific, energy consumption of SSDs in sub-msec time scale. In our study, we used five different FTLs (page mapping, DFTL, block mapping, and two different hybrid mappings) and four different channel configurations (two, four, eight, and 16 channels) under seven different workloads (from large-scale enterprise systems to small-scale desktop applications) in a combinatorial manner. For each combination of the aforementioned parameters, we examined the energy consumption for individual hardware components of an SSD (microcontroller, DRAM, NAND flash, and host interface). The following are some of our findings. First, DFTL is the most energy-efficient address-mapping scheme among the five FTLs we tested due to its good write amplification and small DRAM footprint. Second, a significant fraction of energy is being consumed by idle flash chips waiting for the completion of NAND operations in the other channels. FTL should be designed to fully exploit the internal parallelism so that energy consumption by idle chips is minimized. Third, as a means to increase the internal parallelism, increasing way parallelism (the number of flash chips in a channel) is more effective than increasing channel parallelism in terms of peak energy consumption, performance, and hardware complexity. Fourth, in designing high-performance and energy-efficient SSDs, channel switching delay, way switching delay, and page write latency need to be incorporated in an integrated manner to determine the optimal configuration of internal parallelism.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "8", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Iliadis:2015:RBM, author = "Ilias Iliadis and Vinodh Venkatesan", title = "Rebuttal to {``Beyond MTTDL: a Closed-Form RAID-6 Reliability Equation''}", journal = j-TOS, volume = "11", number = "2", pages = "9:1--9:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700311", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 24 17:41:03 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", note = "See \cite{Elerath:2014:BMC}.", abstract = "A recent article on the reliability of RAID-6 storage systems overlooks certain relevant prior work published in the past 20 years and concludes that the widely used mean time to data loss (MTTDL) metric does not provide accurate results. In this note, we refute this position by invoking uncited relevant prior work and demonstrating that the MTTDL remains a useful metric.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "9", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Chen:2015:EER, author = "Tseng-Yi Chen and Hsin-Wen Wei and Tsung-Tai Yeh and Tsan-Sheng Hsu and Wei-Kuan Shih", title = "An Energy-Efficient and Reliable Storage Mechanism for Data-Intensive Academic Archive Systems", journal = j-TOS, volume = "11", number = "2", pages = "10:1--10:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2720021", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 24 17:41:03 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Previous studies proposed energy-efficient solutions, such as multispeed disks and disk spin-down methods, to conserve power in their respective storage systems. However, in most cases, the authors did not analyze the reliability of their solutions. According to research conducted by Google and the IDEMA standard, frequently setting the disk status to standby mode will increase the disk's Annual Failure Rate and reduce its lifespan. To resolve the issue, we propose an evaluation function called E$^3$ SaRC (Economic Evaluation of Energy Saving with Reliability Constraint), which considers the cost of hardware failure when applying energy-saving schemes. We also present an adaptive write cache mechanism called CacheRAID. The mechanism tries to mitigate the random access problems that implicitly exist in RAID techniques and thereby reduce the energy consumption of RAID disks. CacheRAID also addresses the issue of system reliability by applying a control mechanism to the spin-down algorithm. Our experimental results show that the CacheRAID storage system can reduce the power consumption of the conventional software RAID 5 system by 65\% to 80\%. Moreover, according to the E$^3$ SaRC measurement, the overall saved cost of CacheRAID is the largest among the systems that we compared.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "10", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Zhang:2015:FFC, author = "Ji Zhang and Xunfei Jiang and Xiao Qin and Wei-Shinn Ku and Mohammed I. Alghamdi", title = "{Frog}: a Framework for Context-Based File Systems", journal = j-TOS, volume = "11", number = "3", pages = "11:1--11:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2720022", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Aug 7 09:14:17 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "This article presents a framework, Frog, for Context-Based File Systems (CBFSs) that aim at simplifying the development of context-based file systems and applications. Unlike existing informed-based context-aware systems, Frog is a unifying informed-based framework that abstracts context-specific solutions as views, allowing applications to make view selections according to application behaviors. The framework can not only eliminate overheads induced by traditional context analysis, but also simplify the interactions between the context-based file systems and applications. Rather than propagating data through solution-specific interfaces, views in Frog can be selected by inserting their names in file path strings. With Frog in place, programmers can migrate an application from one solution to another by switching among views rather than changing programming interfaces. Since the data consistency issues are automatically enforced by the framework, file-system developers can focus their attention on context-specific solutions. We implement two prototypes to demonstrate the strengths and overheads of our design. Inspired by an observation that there are more than 50\% of small files ({$<$4KB}) in a file system, we create a Bi-context Archiving Virtual File System (BAVFS) that utilizes conservative and aggressive prefetching for the contexts of random and sequential reads. To improve the performance of random read-and-write operations, the Bi-context Hybrid Virtual File System (BHVFS) combines the update-in-place and update-out-of-place solutions for read-intensive and write-intensive contexts. Our experimental results show that the benefits of Frog-based CBFSs outweigh the overheads introduced by integrating multiple context-specific solutions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "11", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Wei:2015:AFS, author = "Qingsong Wei and Jianxi Chen and Cheng Chen", title = "Accelerating File System Metadata Access with Byte-Addressable Nonvolatile Memory", journal = j-TOS, volume = "11", number = "3", pages = "12:1--12:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2766453", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Aug 7 09:14:17 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "File system performance is dominated by small and frequent metadata access. Metadata is stored as blocks on the hard disk drive. Partial metadata update results in whole-block read or write, which significantly amplifies disk I/O. Furthermore, a huge performance gap between the CPU and disk aggravates this problem. In this article, a file system metadata accelerator (referred to as FSMAC) is proposed to optimize metadata access by efficiently exploiting the persistency and byte-addressability of Nonvolatile Memory (NVM). The FSMAC decouples data and metadata access path, putting data on disk and metadata in byte-addressable NVM at runtime. Thus, data is accessed in a block from I/O the bus and metadata is accessed in a byte-addressable manner from the memory bus. Metadata access is significantly accelerated and metadata I/O is eliminated because metadata in NVM is no longer flushed back to the disk periodically. A lightweight consistency mechanism combining fine-grained versioning and transaction is introduced in the FSMAC. The FSMAC is implemented on a real NVDIMM platform and intensively evaluated under different workloads. Evaluation results show that the FSMAC accelerates the file system up to 49.2 times for synchronized I/O and 7.22 times for asynchronized I/O. Moreover, it can achieve significant performance speedup in network storage and database environment, especially for metadata-intensive or write-dominated workloads.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "12", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Li:2015:TOA, author = "Zhichao Li and Ming Chen and Amanpreet Mukker and Erez Zadok", title = "On the Trade-Offs among Performance, Energy, and Endurance in a Versatile Hybrid Drive", journal = j-TOS, volume = "11", number = "3", pages = "13:1--13:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700312", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Aug 7 09:14:17 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "There are trade-offs among performance, energy, and device endurance for storage systems. Designs optimized for one dimension or workload often suffer in another. Therefore, it is important to study the trade-offs to enable adaptation to workloads and dimensions. As Flash SSD has emerged, hybrid drives have been studied more closely. However, hybrids are mainly designed for high throughput, efficient energy consumption, or improving endurance-leaving quantitative study on the trade-offs unexplored. Past endurance studies also lack a concrete model to help study the trade-offs. Last, previous designs are often based on inflexible policies that cannot adapt easily to changing conditions. We designed and developed GreenDM, a versatile hybrid drive that combines Flash-based SSDs with traditional HDDs. The SSD can be used as cache or as primary storage for hot data. We present our endurance model together with GreenDM to study these trade-offs. GreenDM presents a block interface and requires no modifications to existing software. GreenDM offers tunable parameters to enable the system to adapt to many workloads. We have designed, developed, and carefully evaluated GreenDM with a variety of workloads using commodity SSD and HDD drives. We demonstrate the importance of versatility to enable adaptation to various workloads and dimensions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "13", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Miao:2015:ISS, author = "Youshan Miao and Wentao Han and Kaiwei Li and Ming Wu and Fan Yang and Lidong Zhou and Vijayan Prabhakaran and Enhong Chen and Wenguang Chen", title = "{ImmortalGraph}: a System for Storage and Analysis of Temporal Graphs", journal = j-TOS, volume = "11", number = "3", pages = "14:1--14:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700302", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Aug 7 09:14:17 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Temporal graphs that capture graph changes over time are attracting increasing interest from research communities, for functions such as understanding temporal characteristics of social interactions on a time-evolving social graph. ImmortalGraph is a storage and execution engine designed and optimized specifically for temporal graphs. Locality is at the center of ImmortalGraph's design: temporal graphs are carefully laid out in both persistent storage and memory, taking into account data locality in both time and graph-structure dimensions. ImmortalGraph introduces the notion of locality-aware batch scheduling in computation, so that common ``bulk'' operations on temporal graphs are scheduled to maximize the benefit of in-memory data locality. The design of ImmortalGraph explores an interesting interplay among locality, parallelism, and incremental computation in supporting common mining tasks on temporal graphs. The result is a high-performance temporal-graph system that is up to 5 times more efficient than existing database solutions for graph queries. The locality optimizations in ImmortalGraph offer up to an order of magnitude speedup for temporal iterative graph mining compared to a straightforward application of existing graph engines on a series of snapshots.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "14", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Schindler:2015:ISI, author = "Jiri Schindler and Erez Zadok", title = "Introduction to the Special Issue on {USENIX FAST 2015}", journal = j-TOS, volume = "11", number = "4", pages = "15:1--15:??", month = nov, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2825000", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jan 25 07:23:46 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "15", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Aghayev:2015:SWS, author = "Abutalib Aghayev and Mansour Shafaei and Peter Desnoyers", title = "{Skylight} --- a Window on Shingled Disk Operation", journal = j-TOS, volume = "11", number = "4", pages = "16:1--16:??", month = nov, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2821511", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jan 25 07:23:46 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "We introduce Skylight, a novel methodology that combines software and hardware techniques to reverse engineer key properties of drive-managed Shingled Magnetic Recording (SMR) drives. The software part of Skylight measures the latency of controlled I/O operations to infer important properties of drive-managed SMR, including type, structure, and size of the persistent cache; type of cleaning algorithm; type of block mapping; and size of bands. The hardware part of Skylight tracks drive head movements during these tests, using a high-speed camera through an observation window drilled through the cover of the drive. These observations not only confirm inferences from measurements, but resolve ambiguities that arise from the use of latency measurements alone. We show the generality and efficacy of our techniques by running them on top of three emulated and two real SMR drives, discovering valuable performance-relevant details of the behavior of the real SMR drives.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "16", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Ma:2015:RCM, author = "Ao Ma and Rachel Traylor and Fred Douglis and Mark Chamness and Guanlin Lu and Darren Sawyer and Surendar Chandra and Windsor Hsu", title = "{RAIDShield}: Characterizing, Monitoring, and Proactively Protecting Against Disk Failures", journal = j-TOS, volume = "11", number = "4", pages = "17:1--17:??", month = nov, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2820615", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jan 25 07:23:46 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Modern storage systems orchestrate a group of disks to achieve their performance and reliability goals. Even though such systems are designed to withstand the failure of individual disks, failure of multiple disks poses a unique set of challenges. We empirically investigate disk failure data from a large number of production systems, specifically focusing on the impact of disk failures on RAID storage systems. Our data covers about one million SATA disks from six disk models for periods up to 5 years. We show how observed disk failures weaken the protection provided by RAID. The count of reallocated sectors correlates strongly with impending failures. With these findings we designed RAIDShield, which consists of two components. First, we have built and evaluated an active defense mechanism that monitors the health of each disk and replaces those that are predicted to fail imminently. This proactive protection has been incorporated into our product and is observed to eliminate 88\% of triple disk errors, which are 80\% of all RAID failures. Second, we have designed and simulated a method of using the joint failure probability to quantify and predict how likely a RAID group is to face multiple simultaneous disk failures, which can identify disks that collectively represent a risk of failure even when no individual disk is flagged in isolation. We find in simulation that RAID-level analysis can effectively identify most vulnerable RAID-6 systems, improving the coverage to 98\% of triple errors. We conclude with discussions of operational considerations in deploying RAIDShield more broadly and new directions in the analysis of disk errors. One interesting approach is to combine multiple metrics, allowing the values of different indicators to be used for predictions. Using newer field data that reports an additional metric, medium errors, we find that the relative efficacy of reallocated sectors and medium errors varies across disk models, offering an additional way to predict failures.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "17", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Jannen:2015:BWO, author = "William Jannen and Jun Yuan and Yang Zhan and Amogh Akshintala and John Esmet and Yizheng Jiao and Ankur Mittal and Prashant Pandey and Phaneendra Reddy and Leif Walsh and Michael A. Bender and Martin Farach-Colton and Rob Johnson and Bradley C. Kuszmaul and Donald E. Porter", title = "{BetrFS}: Write-Optimization in a Kernel File System", journal = j-TOS, volume = "11", number = "4", pages = "18:1--18:??", month = nov, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2798729", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jan 25 07:23:46 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "The B$^\epsilon $-tree File System, or BetrFS (pronounced ``better eff ess''), is the first in-kernel file system to use a write-optimized data structure (WODS). WODS are promising building blocks for storage systems because they support both microwrites and large scans efficiently. Previous WODS-based file systems have shown promise but have been hampered in several ways, which BetrFS mitigates or eliminates altogether. For example, previous WODS-based file systems were implemented in user space using FUSE, which superimposes many reads on a write-intensive workload, reducing the effectiveness of the WODS. This article also contributes several techniques for exploiting write-optimization within existing kernel infrastructure. BetrFS dramatically improves performance of certain types of large scans, such as recursive directory traversals, as well as performance of arbitrary microdata operations, such as file creates, metadata updates, and small writes to files. BetrFS can make small, random updates within a large file 2 orders of magnitude faster than other local file systems. BetrFS is an ongoing prototype effort and requires additional data-structure tuning to match current general-purpose file systems on some operations, including deletes, directory renames, and large sequential writes. Nonetheless, many applications realize significant performance improvements on BetrFS. For instance, an in-place rsync of the Linux kernel source sees roughly 1.6--22 $ \times $ speedup over commodity file systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "18", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{DeCapitaniDiVimercati:2015:SIE, author = "Sabrina {De Capitani Di Vimercati} and Sara Foresti and Stefano Paraboschi and Gerardo Pelosi and Pierangela Samarati", title = "Shuffle Index: Efficient and Private Access to Outsourced Data", journal = j-TOS, volume = "11", number = "4", pages = "19:1--19:??", month = nov, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2747878", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Jan 25 07:23:46 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Data outsourcing and cloud computing have been emerging at an ever-growing rate as successful approaches for allowing users and companies to rely on external services for storing and managing data. As data and access to them are not under the control of the data owner, there is a clear need to provide proper confidentiality protection. Such requirements concern the confidentiality not only of the stored data (content) but also of the specific accesses (or patterns of them) that users make on such data. In this article, we address these issues and propose an approach for guaranteeing content, access, and pattern confidentiality in a data outsourcing scenario. The proposed solution is based on the definition of a shuffle index structure, which adapts traditional B+-trees and, by applying a combination of techniques (covers, caches, and shuffling), ensures confidentiality of the data and of queries over them, protecting each single access as well as sequences thereof. The proposed solution also supports update operations over the data, while making reads and writes not recognizable as such by the server. We show that the shuffle index exhibits a limited performance cost, thus resulting effectively usable in practice.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "19", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Desnoyers:2016:ISI, author = "Peter Desnoyers and James Hughes", title = "Introduction to the Special Issue on {MSST 2015}", journal = j-TOS, volume = "12", number = "1", pages = "1:1--1:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2853993", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Feb 29 06:03:46 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "1", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Jones:2016:CDR, author = "Stephanie N. Jones and Ahmed Amer and Ethan L. Miller and Darrell D. E. Long and Rekha Pitchumani and Christina R. Strong", title = "Classifying Data to Reduce Long-Term Data Movement in Shingled Write Disks", journal = j-TOS, volume = "12", number = "1", pages = "2:1--2:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2851505", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Feb 29 06:03:46 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Shingled magnetic recording (SMR) is a means of increasing the density of hard drives that brings a new set of challenges. Due to the nature of SMR disks, updating in place is not an option. Holes left by invalidated data can only be filled if the entire band is reclaimed, and a poor band compaction algorithm could result in spending a lot of time moving blocks over the lifetime of the device. We propose using write frequency to separate blocks to reduce data movement and develop a band compaction algorithm that implements this heuristic. We demonstrate how our algorithm results in improved data management, resulting in an up to 45\% reduction in required data movements when compared to naive approaches to band management.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "2", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Lu:2016:BPE, author = "Youyou Lu and Jiwu Shu and Long Sun", title = "Blurred Persistence: Efficient Transactions in Persistent Memory", journal = j-TOS, volume = "12", number = "1", pages = "3:1--3:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2851504", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Feb 29 06:03:46 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Persistent memory provides data durability in main memory and enables memory-level storage systems. To ensure consistency of such storage systems, memory writes need to be transactional and are carefully moved across the boundary between the volatile CPU cache and the persistent main memory. Unfortunately, cache management in the CPU cache is hardware-controlled. Legacy transaction mechanisms, which are designed for disk-based storage systems, are inefficient in ordered data persistence of transactions in persistent memory. In this article, we propose the Blurred Persistence mechanism to reduce the transaction overhead of persistent memory by blurring the volatility-persistence boundary. Blurred Persistence consists of two techniques. First, Execution in Log executes a transaction in the log to eliminate duplicated data copies for execution. It allows persistence of the volatile uncommitted data, which are detectable with reorganized log structure. Second, Volatile Checkpoint with Bulk Persistence allows the committed data to aggressively stay volatile by leveraging the data durability in the log, as long as the commit order across threads is kept. By doing so, it reduces the frequency of forced persistence and improves cache efficiency. Evaluations show that our mechanism improves system performance by 56.3\% to 143.7\% for a variety of workloads.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "3", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Dragga:2016:GGC, author = "Chris Dragga and Douglas J. Santry", title = "{GCTrees}: Garbage Collecting Snapshots", journal = j-TOS, volume = "12", number = "1", pages = "4:1--4:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2857056", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Feb 29 06:03:46 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "File-system snapshots have been a key component of enterprise storage management since their inception. Creating and managing them efficiently, while maintaining flexibility and low overhead, has been a constant struggle. Although the current state-of-the-art mechanism---hierarchical reference counting---performs reasonably well for traditional small-file workloads, these workloads are increasingly vanishing from the enterprise data center, replaced instead with virtual machine and database workloads. These workloads center around a few very large files, violating the assumptions that allow hierarchical reference counting to operate efficiently. To better cope with these workloads, we introduce Generational Chain Trees (GCTrees), a novel method of space management that uses concepts of block lineage across snapshots rather than explicit reference counting. As a proof of concept, we create a prototype file system---gcext4, a modified version of ext4 that uses GCTrees as a basis for snapshots and copy-on-write. In evaluating this prototype empirically, we find that although they have a somewhat higher overhead for traditional workloads, GCTrees have dramatically lower overhead than hierarchical reference counting for large-file workloads, improving by a factor of 34 or more in some cases. Furthermore, gcext4 performs comparably to ext4 across all workloads, showing that GCTrees impose minor cost for their benefits.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "4", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Grawinkel:2016:LRM, author = "Matthias Grawinkel and Lars Nagel and Andr{\'e} Brinkmann", title = "{LoneStar RAID}: Massive Array of Offline Disks for Archival Systems", journal = j-TOS, volume = "12", number = "1", pages = "5:1--5:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2840810", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Feb 29 06:03:46 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "The need for huge storage archives rises with the ever growing creation of data. With today's big data and data analytics applications, some of these huge archives become active in the sense that all stored data can be accessed at any time. Running and evolving these archives is a constant tradeoff between performance, capacity, and price. We present the LoneStar RAID, a disk-based storage architecture, which focuses on high reliability, low energy consumption, and cheap reads. It is designed for MAID systems with up to hundreds of disk drives per server and is optimized for ``write once, read sometimes'' workloads. We use dedicated data and parity disks, and export the data disks as individually accessible buckets. By intertwining disk groups into a two-dimensional RAID and improving single-disk reliability with intradisk redundancy, the system achieves an elastic fault tolerance that can at least recover all 3-disk failures. Furthermore, we integrate a cache to offload parity updates and a journal to track the RAID's state. The LoneStar RAID scheme provides a mean time to data loss (MTTDL) that competes with today's erasure codes and is optimized to require only a minimal set of running disk drives.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "5", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Jung:2016:NHF, author = "Myoungsoo Jung and Wonil Choi and Shuwen Gao and Ellis Herbert {Wilson III} and David Donofrio and John Shalf and Mahmut Taylan Kandemir", title = "{NANDFlashSim}: High-Fidelity, Microarchitecture-Aware {NAND} Flash Memory Simulation", journal = j-TOS, volume = "12", number = "2", pages = "6:1--6:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2700310", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Jun 8 16:03:39 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "As the popularity of NAND flash expands in arenas from embedded systems to high-performance computing, a high-fidelity understanding of its specific properties becomes increasingly important. Further, with the increasing trend toward multiple-die, multiple-plane architectures and high-speed interfaces, flash memory systems are expected to continue to scale and cheapen, resulting in their broader proliferation. However, when designing NAND-based devices, making decisions about the optimal system configuration is nontrivial, because flash is sensitive to a number of parameters and suffers from inherent latency variations, and no available tools suffice for studying these nuances. The parameters include the architectures, such as multidie and multiplane, diverse node technologies, bit densities, and cell reliabilities. Therefore, we introduce NANDFlashSim, a high-fidelity, latency-variation-aware, and highly configurable NAND-flash simulator, which implements a detailed timing model for 16 state-of-the-art NAND operations. Using NANDFlashSim, we notably discover the following. First, regardless of the operation, reads fail to leverage internal parallelism. Second, MLC provides lower I/O bus contention than SLC, but contention becomes a serious problem as the number of dies increases. Third, many-die architectures outperform many-plane architectures for disk-friendly workloads. Finally, employing a high-performance I/O bus or an increased page size does not enhance energy savings. Our simulator is available at http://nfs.camelab.org.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "6", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Wildani:2016:CWG, author = "Avani Wildani and Ethan L. Miller", title = "Can We Group Storage? {Statistical} Techniques to Identify Predictive Groupings in Storage System Accesses", journal = j-TOS, volume = "12", number = "2", pages = "7:1--7:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2738042", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Jun 8 16:03:39 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Storing large amounts of data for different users has become the new normal in a modern distributed cloud storage environment. Storing data successfully requires a balance of availability, reliability, cost, and performance. Typically, systems design for this balance with minimal information about the data that will pass through them. We propose a series of methods to derive groupings from data that have predictive value, informing layout decisions for data on disk. Unlike previous grouping work, we focus on dynamically identifying groupings in data that can be gathered from active systems in real time with minimal impact using spatiotemporal locality. We outline several techniques we have developed and discuss how we select particular techniques for particular workloads and application domains. Our statistical and machine-learning-based grouping algorithms answer questions such as ``What can a grouping be based on?'' and ``Is a given grouping meaningful for a given application?'' We design our models to be flexible and require minimal domain information so that our results are as broadly applicable as possible. We intend for this work to provide a launchpad for future specialized system design using groupings in combination with caching policies and architectural distinctions such as tiered storage to create the next generation of scalable storage systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "7", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Huang:2016:IFB, author = "Sai Huang and Qingsong Wei and Dan Feng and Jianxi Chen and Cheng Chen", title = "Improving Flash-Based Disk Cache with Lazy Adaptive Replacement", journal = j-TOS, volume = "12", number = "2", pages = "8:1--8:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2737832", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Jun 8 16:03:39 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "For years, the increasing popularity of flash memory has been changing storage systems. Flash-based solid-state drives (SSDs) are widely used as a new cache tier on top of hard disk drives (HDDs) to speed up data-intensive applications. However, the endurance problem of flash memory remains a concern and is getting worse with the adoption of MLC and TLC flash. In this article, we propose a novel cache management algorithm for flash-based disk cache named Lazy Adaptive Replacement Cache (LARC). LARC adopts the idea of selective caching to filter out seldom accessed blocks and prevent them from entering cache. This avoids cache pollution and preserves popular blocks in cache for a longer period of time, leading to a higher hit rate. Meanwhile, by avoiding unnecessary cache replacements, LARC reduces the volume of data written to the SSD and yields an SSD-friendly access pattern. In this way, LARC improves the performance and endurance of the SSD at the same time. LARC is self-tuning and incurs little overhead. It has been extensively evaluated by both trace-driven simulations and synthetic benchmarks on a prototype implementation. Our experiments show that LARC outperforms state-of-art algorithms for different kinds of workloads and extends SSD lifetime by up to 15.7 times.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "8", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Zhang:2016:EDP, author = "Yihua Zhang and Marina Blanton", title = "Efficient Dynamic Provable Possession of Remote Data via Update Trees", journal = j-TOS, volume = "12", number = "2", pages = "9:1--9:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2747877", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Jun 8 16:03:39 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "The emergence and wide availability of remote storage service providers prompted work in the security community that allows clients to verify integrity and availability of the data that they outsourced to a not fully trusted remote storage server at a relatively low cost. Most recent solutions to this problem allow clients to read and update (i.e., insert, modify, or delete) stored data blocks while trying to lower the overhead associated with verifying the integrity of the stored data. In this work, we develop a novel scheme, performance of which favorably compares with the existing solutions. Our solution additionally enjoys a number of new features, such as a natural support for operations on ranges of blocks, revision control, and support for multiple user access to shared content. The performance guarantees that we achieve stem from a novel data structure called a balanced update tree and removing the need for interaction during update operations in addition to communicating the updates themselves.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "9", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Wang:2016:SIW, author = "Wei Wang and Tao Xie and Abhinav Sharma", title = "{SWANS}: an Interdisk Wear-Leveling Strategy for {RAID-0} Structured {SSD} Arrays", journal = j-TOS, volume = "12", number = "3", pages = "10:1--10:??", month = jun, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2756555", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Mar 25 07:00:06 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "NAND flash memory-based solid state disks (SSDs) have been widely used in enterprise servers. However, flash memory has limited write endurance, as a block becomes unreliable after a finite number of program/erase cycles. Existing wear-leveling techniques are essentially intradisk data distribution schemes, as they can only even wear out across the flash medium within a single SSD. When multiple SSDs are organized in an array manner in server applications, an interdisk wear-leveling technique, which can ensure a uniform wear-out distribution across SSDs, is much needed. In this article, we propose a novel SSD-array level wear-leveling strategy called SWANS (Smoothing Wear Across NSSDs) for an SSD array structured in a RAID-0 format, which is frequently used in server applications. SWANS dynamically monitors and balances write distributions across SSDs in an intelligent way. Further, to evaluate its effectiveness, we build an SSD array simulator on top of a validated single SSD simulator. Next, SWANS is implemented in its array controller. Comprehensive experiments with real-world traces show that SWANS decreases the standard deviation of writes across SSDs on average by 16.7x. The gap in the total bytes written between the most written SSD and the least written SSD in an 8-SSD array shrinks at least 1.3x.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "10", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Moon:2016:DRI, author = "Sangwhan Moon and A. L. Narasimha Reddy", title = "Does {RAID} Improve Lifetime of {SSD} Arrays?", journal = j-TOS, volume = "12", number = "3", pages = "11:1--11:??", month = jun, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2764915", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Mar 25 07:00:06 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Parity protection at the system level is typically employed to compose reliable storage systems. However, careful consideration is required when SSD-based systems employ parity protection. First, additional writes are required for parity updates. Second, parity consumes space on the device, which results in write amplification from less efficient garbage collection at higher space utilization. This article analyzes the effectiveness of SSD-based RAID and discusses the potential benefits and drawbacks in terms of reliability. A Markov model is presented to estimate the lifetime of SSD-based RAID systems in different environments. In a small array, our results show that parity protection provides benefit only with considerably low space utilizations and low data access rates. However, in a large system, RAID improves data lifetime even when we take write amplification into account.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "11", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Kang:2016:MPV, author = "Junbin Kang and Chunming Hu and Tianyu Wo and Ye Zhai and Benlong Zhang and Jinpeng Huai", title = "{MultiLanes}: Providing Virtualized Storage for {OS}-Level Virtualization on Manycores", journal = j-TOS, volume = "12", number = "3", pages = "12:1--12:??", month = jun, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2801155", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Mar 25 07:00:06 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "OS-level virtualization is often used for server consolidation in data centers because of its high efficiency. However, the sharing of storage stack services among the colocated containers incurs contention on shared kernel data structures and locks within I/O stack, leading to severe performance degradation on manycore platforms incorporating fast storage technologies (e.g., SSDs based on nonvolatile memories). This article presents MultiLanes, a virtualized storage system for OS-level virtualization on manycores. MultiLanes builds an isolated I/O stack on top of a virtualized storage device for each container to eliminate contention on kernel data structures and locks between them, thus scaling them to manycores. Meanwhile, we propose a set of techniques to tune the overhead induced by storage-device virtualization to be negligible, and to scale the virtualized devices to manycores on the host, which itself scales poorly. To reduce the contention within each single container, we further propose SFS, which runs multiple file-system instances through the proposed virtualized storage devices, distributes all files under each directory among the underlying file-system instances, then stacks a unified namespace on top of them. The evaluation of our prototype system built for Linux container (LXC) on a 32-core machine with both a RAM disk and a modern flash-based SSD demonstrates that MultiLanes scales much better than Linux in micro- and macro-benchmarks, bringing significant performance improvements, and that MultiLanes with SFS can further reduce the contention within each single container.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "12", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Chen:2016:IPF, author = "Feng Chen and Binbing Hou and Rubao Lee", title = "Internal Parallelism of Flash Memory-Based Solid-State Drives", journal = j-TOS, volume = "12", number = "3", pages = "13:1--13:??", month = jun, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2818376", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Mar 25 07:00:06 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "A unique merit of a solid-state drive (SSD) is its internal parallelism. In this article, we present a set of comprehensive studies on understanding and exploiting internal parallelism of SSDs. Through extensive experiments and thorough analysis, we show that exploiting internal parallelism of SSDs can not only substantially improve input/output (I/O) performance but also may lead to some surprising side effects and dynamics. For example, we find that with parallel I/Os, SSD performance is no longer highly sensitive to access patterns (random or sequential), but rather to other factors, such as data access interferences and physical data layout. Many of our prior understandings about SSDs also need to be reconsidered. For example, we find that with parallel I/Os, write performance could outperform reads and is largely independent of access patterns, which is opposite to our long-existing common understanding about slow random writes on SSDs. We have also observed a strong interference between concurrent reads and writes as well as the impact of physical data layout to parallel I/O performance. Based on these findings, we present a set of case studies in database management systems, a typical data-intensive application. Our case studies show that exploiting internal parallelism is not only the key to enhancing application performance, and more importantly, it also fundamentally changes the equation for optimizing applications. This calls for a careful reconsideration of various aspects in application and system designs. Furthermore, we give a set of experimental studies on new-generation SSDs and the interaction between internal and external parallelism in an SSD-based Redundant Array of Independent Disks (RAID) storage. With these critical findings, we finally make a set of recommendations to system architects and application designers for effectively exploiting internal parallelism.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "13", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Basak:2016:SWI, author = "Jayanta Basak and Kushal Wadhwani and Kaladhar Voruganti", title = "Storage Workload Identification", journal = j-TOS, volume = "12", number = "3", pages = "14:1--14:??", month = jun, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2818716", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Mar 25 07:00:06 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Storage workload identification is the task of characterizing a workload in a storage system (more specifically, network storage system-NAS or SAN) and matching it with the previously known workloads. We refer to storage workload identification as ``workload identification'' in the rest of this article. Workload identification is an important problem for cloud providers to solve because (1) providers can leverage this information to colocate similar workloads to make the system more predictable and (2) providers can identify workloads and subsequently give guidance to the subscribers as to associated best practices (with respect to configuration) for provisioning those workloads. Historically, people have identified workloads by looking at their read/write ratios, random/sequential ratios, block size, and interarrival frequency. Researchers are well aware that workload characteristics change over time and that one cannot just take a point in time view of a workload, as that will incorrectly characterize workload behavior. Increasingly, manual detection of workload signature is becoming harder because (1) it is difficult for a human to detect a pattern and (2) representing a workload signature by a tuple consisting of average values for each of the signature components leads to a large error. In this article, we present workload signature detection and a matching algorithm that is able to correctly identify workload signatures and match them with other similar workload signatures. We have tested our algorithm on nine different workloads generated using publicly available traces and on real customer workloads running in the field to show the robustness of our approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "14", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Lee:2016:EST, author = "Sungjin Lee and Dongkun Shin and Youngjin Kim and Jihong Kim", title = "Exploiting Sequential and Temporal Localities to Improve Performance of {NAND} Flash-Based {SSDs}", journal = j-TOS, volume = "12", number = "3", pages = "15:1--15:??", month = jun, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2905054", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Mar 25 07:00:06 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "NAND flash-based Solid-State Drives (SSDs) are becoming a viable alternative as a secondary storage solution for many computing systems. Since the physical characteristics of NAND flash memory are different from conventional Hard-Disk Drives (HDDs), flash-based SSDs usually employ an intermediate software layer, called a Flash Translation Layer (FTL). The FTL runs several firmware algorithms for logical-to-physical mapping, I/O interleaving, garbage collection, wear-leveling, and so on. These FTL algorithms not only have a great effect on storage performance and lifetime, but also determine hardware cost and data integrity. In general, a hybrid FTL scheme has been widely used in mobile devices because it exhibits high performance and high data integrity at a low hardware cost. Recently, a demand-based FTL based on page-level mapping has been rapidly adopted in high-performance SSDs. The demand-based FTL more effectively exploits the device-level parallelism than the hybrid FTL and requires a small amount of memory by keeping only popular mapping entries in DRAM. Because of this caching mechanism, however, the demand-based FTL is not robust enough for power failures and requires extra reads to fetch missing mapping entries from NAND flash. In this article, we propose a new flash translation layer called LAST++. The proposed LAST++ scheme is based on the hybrid FTL, thus it has the inherent benefits of the hybrid FTL, including low resource requirements, strong robustness for power failures, and high read performance. By effectively exploiting the locality of I/O references, LAST++ increases device-level parallelism and reduces garbage collection overheads. This leads to a great improvement of I/O performance and makes it possible to overcome the limitations of the hybrid FTL. Our experimental results show that LAST++ outperforms the demand-based FTL by 27\% for writes and 7\% for reads, on average, while offering higher robustness against sudden power failures. LAST++ also improves write performance by 39\%, on average, over the existing hybrid FTL.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "15", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Wan:2016:HSF, author = "Jiguang Wan and Peng Xu and Xubin He and Jibin Wang and Junyao Li and Changsheng Xie", title = "{H-Scale}: a Fast Approach to Scale Disk Arrays via Hybrid Stripe Deployment", journal = j-TOS, volume = "12", number = "3", pages = "16:1--16:??", month = jun, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2822895", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Mar 25 07:00:06 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "To satisfy the explosive growth of data in large-scale data centers, where redundant arrays of independent disks (RAIDs), especially RAID-5, are widely deployed, effective storage scaling and disk expansion methods are desired. However, a way to reduce the data migration overhead and maintain the reliability of the original RAID are major concerns of storage scaling. To address these problems, we propose a new RAID scaling scheme, H-Scale, to achieve fast RAID scaling via hybrid stripe layouts. H-Scale takes advantage of the loose restriction of stripe structures to choose migrated data and to create hybrid stripe structures. The main advantages of our scheme include: (1) dramatically reducing the data migration overhead and thus speeding up the scaling process, (2) maintaining the original RAID's reliability, (3) balancing the workload among disks after scaling, and (4) providing a general scaling approach for different RAID levels. Our theoretical analysis show that H-Scale outperforms existing scaling solutions in terms of data migration, I/O overheads, and parity update operations. Evaluation results on a prototype implementation demonstrate that H-Scale speeds up the online scaling process by up to 60\% under SPC traces, and similar improvements on scaling time and user response time are also achieved by evaluations using standard benchmarks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "16", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Basak:2016:UFL, author = "Jayanta Basak and P. C. Nagesh", title = "A User-Friendly Log Viewer for Storage Systems", journal = j-TOS, volume = "12", number = "3", pages = "17:1--17:??", month = jun, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2846101", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Mar 25 07:00:06 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "System log files contains messages emitted from several modules within a system and carries valuable information about the system state such as device status and error conditions and also about the various tasks within the system such as program names, execution path, including function names and parameters, and the task completion status. For customers with remote support, the system collects and transmits these logs to a central enterprise repository, where these are monitored for alerts, problem forecasting, and troubleshooting. Very large log files limit the interpretability for the support engineers. For an expert, a large volume of log messages may not pose any problem; however, an inexperienced person may get flummoxed due to the presence of a large number of log messages. Often it is desired to present the log messages in a comprehensive manner where a person can view the important messages first and then go into details if required. In this article, we present a user-friendly log viewer where we first hide the unimportant or inconsequential messages from the log file. A user can then click a particular hidden view and get the details of the hidden messages. Messages with low utility are considered inconsequential as their removal does not impact the end user for the aforesaid purpose such as problem forecasting or troubleshooting. We relate the utility of a message to the probability of its appearance in the due context. We present machine-learning-based techniques that computes the usefulness of individual messages in a log file. We demonstrate identification and discarding of inconsequential messages to shrink the log size to acceptable limits. We have tested this over real-world logs and observed that eliminating such low value data can reduce the log files significantly (30\% to 55\%), with minimal error rates (7\% to 20\%). When limited user feedback is available, we show modifications to the technique to learn the user intent and accordingly further reduce the error.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "17", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Diesburg:2016:TLA, author = "Sarah Diesburg and Christopher Meyers and Mark Stanovich and An-I Andy Wang and Geoff Kuenning", title = "{TrueErase}: Leveraging an Auxiliary Data Path for Per-File Secure Deletion", journal = j-TOS, volume = "12", number = "4", pages = "18:1--18:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2854882", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Mar 25 07:00:07 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "One important aspect of privacy is the ability to securely delete sensitive data from electronic storage in such a way that it cannot be recovered; we call this action secure deletion. Short of physically destroying the entire storage medium, existing software secure-deletion solutions tend to be piecemeal at best --- they may only work for one type of storage or file system, may force the user to delete all files instead of selected ones, may require the added complexities of encryption and key storage, may require extensive changes and additions to the computer's operating system or storage firmware, and may not handle system crashes gracefully. We present TrueErase, a holistic secure-deletion framework for individual systems that contain sensitive data. Through design, implementation, verification, and evaluation on both a hard drive and NAND flash, TrueErase shows that it is possible to construct a per-file, secure-deletion framework that can accommodate different storage media and legacy file systems, require limited changes to legacy systems, and handle common crash scenarios. TrueErase can serve as a building block by cryptographic systems that securely delete information by erasing encryption keys. The overhead is dependent on spatial locality, number of sensitive files, and workload (computational- or I/O-bound).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "18", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Song:2016:EMM, author = "Nae Young Song and Yongseok Son and Hyuck Han and Heon Young Yeom", title = "Efficient Memory-Mapped {I/O} on Fast Storage Device", journal = j-TOS, volume = "12", number = "4", pages = "19:1--19:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2846100", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Mar 25 07:00:07 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "In modern operating systems, memory-mapped I/O (mmio) is an important access method that maps a file or file-like resource to a region of memory. The mapping allows applications to access data from files through memory semantics (i.e., load/store) and it provides ease of programming. The number of applications that use mmio are increasing because memory semantics can provide better performance than file semantics (i.e., read/write). As more data are located in the main memory, the performance of applications can be enhanced owing to the effect of a large cache. When mmio is used, hot data tend to reside in the main memory and cold data are located in storage devices such as HDD and SSD; data placement in the memory hierarchy depends on the virtual memory subsystem of the operating system. Generally, the performance of storage devices has a direct impact on the performance of mmio. It is widely expected that better storage devices will lead to better performance. However, the expectation is limited when fast storage devices are used since the virtual memory subsystem does not reflect the performance feature of those devices. In this article, we examine the Linux virtual memory subsystem and mmio path to determine the influence of fast storage on the existing Linux kernel. Throughout our investigation, we find that the overhead of the Linux virtual memory subsystem, negligible on the HDD, prevents applications from using the full performance of fast storage devices. To reduce the overheads and fully exploit the fast storage devices, we present several optimization techniques. We modify the Linux kernel to implement our optimization techniques and evaluate our prototyped system with low-latency storage devices. Experimental results show that our optimized mmio has up to 7x better performance than the original mmio. We also compare our system to a system that has enough memory to keep all data in the main memory. The system with insufficient memory and our mmio achieves 92\% performance of the resource-rich system. This result implies that our virtual memory subsystem for mmap can effectively extend the main memory with fast storage devices.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "19", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Paulo:2016:EDD, author = "Jo{\~a}o Paulo and Jos{\'e} Pereira", title = "Efficient Deduplication in a Distributed Primary Storage Infrastructure", journal = j-TOS, volume = "12", number = "4", pages = "20:1--20:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2876509", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Mar 25 07:00:07 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "A large amount of duplicate data typically exists across volumes of virtual machines in cloud computing infrastructures. Deduplication allows reclaiming these duplicates while improving the cost-effectiveness of large-scale multitenant infrastructures. However, traditional archival and backup deduplication systems impose prohibitive storage overhead for virtual machines hosting latency-sensitive applications. Primary deduplication systems reduce such penalty but rely on special cluster filesystems, centralized components, or restrictive workload assumptions. Also, some of these systems reduce storage overhead by confining deduplication to off-peak periods that may be scarce in a cloud environment. We present DEDIS, a dependable and fully decentralized system that performs cluster-wide off-line deduplication of virtual machines' primary volumes. DEDIS works on top of any unsophisticated storage backend, centralized or distributed, as long as it exports a basic shared block device interface. Also, DEDIS does not rely on data locality assumptions and incorporates novel optimizations for reducing deduplication overhead and increasing its reliability. The evaluation of an open-source prototype shows that minimal I/O overhead is achievable even when deduplication and intensive storage I/O are executed simultaneously. Also, our design scales out and allows collocating DEDIS components and virtual machines in the same servers, thus, sparing the need of additional hardware.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "20", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Yang:2016:WSZ, author = "Yue Yang and Jianwen Zhu", title = "Write Skew and {Zipf} Distribution: Evidence and Implications", journal = j-TOS, volume = "12", number = "4", pages = "21:1--21:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2908557", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Mar 25 07:00:07 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/benfords-law.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Understanding workload characteristics is essential to storage systems design and performance optimization. With the emergence of flash memory as a new viable storage medium, the new design concern of flash endurance arises, necessitating a revisit of workload characteristics, in particular, of the write behavior. Inspired by Web caching studies where a Zipf-like access pattern is commonly found, we hypothesize that write count distribution at the block level may also follow Zipf's Law. To validate this hypothesis, we study 48 block I/O traces collected from a wide variety of real and benchmark applications. Through extensive analysis, we demonstrate that the Zipf-like pattern indeed widely exists in write traffic provided its disguises are removed by statistical processing. This finding implies that write skew in a large class of applications could be analytically expressed and, thus, facilitates design tradeoff explorations adaptive to workload characteristics.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "21", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Wu:2016:LLD, author = "Suzhen Wu and Bo Mao and Xiaolan Chen and Hong Jiang", title = "{LDM}: Log Disk Mirroring with Improved Performance and Reliability for {SSD}-Based Disk Arrays", journal = j-TOS, volume = "12", number = "4", pages = "22:1--22:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2892639", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Mar 25 07:00:07 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "With the explosive growth in data volume, the I/O bottleneck has become an increasingly daunting challenge for big data analytics. Economic forces, driven by the desire to introduce flash-based Solid-State Drives (SSDs) into the high-end storage market, have resulted in hybrid storage systems in the cloud. However, a single flash-based SSD cannot satisfy the performance, reliability, and capacity requirements of enterprise or HPC storage systems in the cloud. While an array of SSDs organized in a RAID structure, such as RAID5, provides the potential for high storage capacity and bandwidth, reliability and performance problems will likely result from the parity update operations. In this article, we propose a Log Disk Mirroring scheme (LDM) to improve the performance and reliability of SSD-based disk arrays. LDM is a hybrid disk array architecture that consists of several SSDs and two hard disk drives (HDDs). In an LDM array, the two HDDs are mirrored as a write buffer that temporally absorbs the small write requests. The small and random write data are written on the mirroring buffer by using the logging technique that sequentially appends new data. The small write data are merged and destaged to the SSD-based disk array during the system idle periods. Our prototype implementation of the LDM array and the performance evaluations show that the LDM array significantly outperforms the pure SSD-based disk arrays by a factor of 20.4 on average, and outperforms HPDA by a factor of 5.0 on average. The reliability analysis shows that the MTTDL of the LDM array is 2.7 times and 1.7 times better than that of pure SSD-based disk arrays and HPDA disk arrays.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "22", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Esiner:2016:FFB, author = "Ertem Esiner and Adilet Kachkeev and Samuel Braunfeld and Alptekin K{\"u}p{\c{c}}{\"u} and {\"O}znur {\"O}zkasap", title = "{FlexDPDP}: Flexlist-Based Optimized Dynamic Provable Data Possession", journal = j-TOS, volume = "12", number = "4", pages = "23:1--23:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2943783", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Mar 25 07:00:07 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "With increasing popularity of cloud storage, efficiently proving the integrity of data stored on an untrusted server has become significant. Authenticated skip lists and rank-based authenticated skip lists (RBASL) have been used to provide support for provable data update operations in cloud storage. However, in a dynamic file scenario, an RBASL based on block indices falls short when updates are not proportional to a fixed block size; such an update to the file, even if small, may result in $ O(n) $ updates on the data structure for a file with n blocks. To overcome this problem, we introduce FlexList, a flexible length-based authenticated skip list. FlexList translates variable-size updates to $ O(\lceil u / B \rceil) $ insertions, removals, or modifications, where u is the size of the update and B is the (average) block size. We further present various optimizations on the four types of skip lists (regular, authenticated, rank-based authenticated, and FlexList). We build such a structure in $ O(n) $ time and parallelize this operation for the first time. We compute one single proof to answer multiple (non)membership queries and obtain efficiency gains of 35\%, 35\%, and 40\% in terms of proof time, energy, and size, respectively. We propose a method of handling multiple updates at once, achieving efficiency gains of up to 60\% at the server side and 90\% at the client side. We also deployed our implementation of FlexDPDP (dynamic provable data possession (DPDP) with FlexList instead of RBASL) on PlanetLab, demonstrating that FlexDPDP performs comparable to the most efficient static storage scheme (provable data possession (PDP)) while providing dynamic data support.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "23", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Hall:2016:TPR, author = "Robert J. Hall", title = "Tools for Predicting the Reliability of Large-Scale Storage Systems", journal = j-TOS, volume = "12", number = "4", pages = "24:1--24:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2911987", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Mar 25 07:00:07 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Data-intensive applications require extreme scaling of their underlying storage systems. Such scaling, together with the fact that storage systems must be implemented in actual data centers, increases the risk of data loss from failures of underlying components. Accurate engineering requires quantitatively predicting reliability, but this remains challenging due to the need to account for extreme scale, redundancy scheme type and strength, distribution architecture, and component dependencies. This article introduces CQS im-R, a tool suite for predicting the reliability of large-scale storage system designs and deployments. CQSim-R includes (a) direct calculations based on an only-drives-fail failure model and (b) an event-based simulator for detailed prediction that handles failures of and failure dependencies among arbitrary (drive or nondrive) components. These are based on a common combinatorial framework for modeling placement strategies. The article demonstrates CQSim-R using models of common storage systems, including replicated and erasure coded designs. New results, such as the poor reliability scaling of spread-placed systems and a quantification of the impact of data center distribution and rack-awareness on reliability, demonstrate the usefulness and generality of the tools. Analysis and empirical studies show the tools' soundness, performance, and scalability.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "24", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Brown:2017:ISI, author = "Angela Demke Brown and Florentina Popovici", title = "Introduction to the Special Issue on {USENIX FAST 2016}", journal = j-TOS, volume = "13", number = "1", pages = "1:1--1:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3039209", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Mar 25 07:00:07 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "1", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Stefanovici:2017:TSS, author = "Ioan Stefanovici and Bianca Schroeder and Greg O'Shea and Eno Thereska", title = "Treating the Storage Stack Like a Network", journal = j-TOS, volume = "13", number = "1", pages = "2:1--2:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3032968", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Mar 25 07:00:07 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "In a data center, an IO from an application to distributed storage traverses not only the network but also several software stages with diverse functionality. This set of ordered stages is known as the storage or IO stack. Stages include caches, hypervisors, IO schedulers, file systems, and device drivers. Indeed, in a typical data center, the number of these stages is often larger than the number of network hops to the destination. Yet, while packet routing is fundamental to networks, no notion of IO routing exists on the storage stack. The path of an IO to an endpoint is predetermined and hard coded. This forces IO with different needs (e.g., requiring different caching or replica selection) to flow through a one-size-fits-all IO stack structure, resulting in an ossified IO stack. This article proposes sRoute, an architecture that provides a routing abstraction for the storage stack. sRoute comprises a centralized control plane and ``sSwitches'' on the data plane. The control plane sets the forwarding rules in each sSwitch to route IO requests at runtime based on application-specific policies. A key strength of our architecture is that it works with unmodified applications and Virtual Machines (VMs). This article shows significant benefits of customized IO routing to data center tenants: for example, a factor of 10 for tail IO latency, more than 60\% better throughput for a customized replication protocol, a factor of 2 in throughput for customized caching, and enabling live performance debugging in a running system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "2", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Yuan:2017:WWR, author = "Jun Yuan and Yang Zhan and William Jannen and Prashant Pandey and Amogh Akshintala and Kanchan Chandnani and Pooja Deo and Zardosht Kasheff and Leif Walsh and Michael A. Bender and Martin Farach-Colton and Rob Johnson and Bradley C. Kuszmaul and Donald E. Porter", title = "Writes Wrought Right, and Other Adventures in File System Optimization", journal = j-TOS, volume = "13", number = "1", pages = "3:1--3:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3032969", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Mar 25 07:00:07 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "File systems that employ write-optimized dictionaries (WODs) can perform random-writes, metadata updates, and recursive directory traversals orders of magnitude faster than conventional file systems. However, previous WOD-based file systems have not obtained all of these performance gains without sacrificing performance on other operations, such as file deletion, file or directory renaming, or sequential writes. Using three techniques, late-binding journaling, zoning, and range deletion, we show that there is no fundamental trade-off in write-optimization. These dramatic improvements can be retained while matching conventional file systems on all other operations. BetrFS 0.2 delivers order-of-magnitude better performance than conventional file systems on directory scans and small random writes and matches the performance of conventional file systems on rename, delete, and sequential I/O. For example, BetrFS 0.2 performs directory scans $ 2.2 \times $ faster, and small random writes over two orders of magnitude faster, than the fastest conventional file system. But unlike BetrFS 0.1, it renames and deletes files commensurate with conventional file systems and performs large sequential I/O at nearly disk bandwidth. The performance benefits of these techniques extend to applications as well. BetrFS 0.2 continues to outperform conventional file systems on many applications, such as as rsync, git-diff, and tar, but improves git-clone performance by 35\% over BetrFS 0.1, yielding performance comparable to other file systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "3", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Shin:2017:IAT, author = "Ji-Yong Shin and Mahesh Balakrishnan and Tudor Marian and Hakim Weatherspoon", title = "{Isotope}: {ACID} Transactions for Block Storage", journal = j-TOS, volume = "13", number = "1", pages = "4:1--4:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3032967", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Mar 25 07:00:07 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Existing storage stacks are top heavy and expect little from block storage. As a result, new high-level storage abstractions-and new designs for existing abstractions-are difficult to realize, requiring developers to implement from scratch complex functionality such as failure atomicity and fine-grained concurrency control. In this article, we argue that pushing transactional isolation into the block store (in addition to atomicity and durability) is both viable and broadly useful, resulting in simpler high-level storage systems that provide strong semantics without sacrificing performance. We present Isotope, a new block store that supports ACID transactions over block reads and writes. Internally, Isotope uses a new multiversion concurrency control protocol that exploits fine-grained, subblock parallelism in workloads and offers both strict serializability and snapshot isolation guarantees. We implemented several high-level storage systems over Isotope, including two key-value stores that implement the LevelDB API over a hash table and B-tree, respectively, and a POSIX file system. We show that Isotope's block-level transactions enable systems that are simple (100s of lines of code), robust (i.e., providing ACID guarantees), and fast (e.g., 415MB/s for random file writes). We also show that these systems can be composed using Isotope, providing applications with transactions across different high-level constructs such as files, directories, and key-value pairs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "4", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Lu:2017:WSK, author = "Lanyue Lu and Thanumalayan Sankaranarayana Pillai and Hariharan Gopalakrishnan and Andrea C. Arpaci-Dusseau and Remzi H. Arpaci-Dusseau", title = "{WiscKey}: Separating Keys from Values in {SSD}-Conscious Storage", journal = j-TOS, volume = "13", number = "1", pages = "5:1--5:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3033273", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Mar 25 07:00:07 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "We present WiscKey, a persistent LSM-tree-based key-value store with a performance-oriented data layout that separates keys from values to minimize I/O amplification. The design of WiscKey is highly SSD optimized, leveraging both the sequential and random performance characteristics of the device. We demonstrate the advantages of WiscKey with both microbenchmarks and YCSB workloads. Microbenchmark results show that WiscKey is $ 2.5 \times $ to $ 111 \times $ faster than LevelDB for loading a database (with significantly better tail latencies) and $ 1.6 \times $ to $ 14 \times $ faster for random lookups. WiscKey is faster than both LevelDB and RocksDB in all six YCSB workloads.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "5", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Li:2017:CSN, author = "Ning Li and Hong Jiang and Dan Feng and Zhan Shi", title = "Customizable {SLO} and Its Near-Precise Enforcement for Storage Bandwidth", journal = j-TOS, volume = "13", number = "1", pages = "6:1--6:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2998454", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Mar 25 07:00:07 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Cloud service is being adopted as a utility for large numbers of tenants by renting Virtual Machines (VMs). But for cloud storage, unpredictable IO characteristics make accurate Service-Level-Objective (SLO) enforcement challenging. As a result, it has been very difficult to support simple-to-use and technology-agnostic SLO specifying a particular value for a specific metric (e.g., storage bandwidth). This is because the quality of SLO enforcement depends on performance error and fluctuation that measure the precision of SLO enforcement. High precision of SLO enforcement is critical for user-oriented performance customization and user experiences. To address this challenge, this article presents V-Cup, a framework for VM-oriented customizable SLO and its near-precise enforcement. It consists of multiple auto-tuners, each of which exports an interface for a tenant to customize the desired storage bandwidth for a VM and enable the storage bandwidth of the VM to converge on the target value with a predictable precision. We design and implement V-Cup in the Xen hypervisor based on the fair sharing scheduler for VM-level resource management. Our V-Cup prototype evaluation shows that it achieves satisfying performance guarantees through near-precise SLO enforcement.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "6", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Qi:2017:CLN, author = "Shigui Qi and Dan Feng and Nan Su and Linjun Mei and Jingning Liu", title = "{CDF--LDPC}: a New Error Correction Method for {SSD} to Improve the Read Performance", journal = j-TOS, volume = "13", number = "1", pages = "7:1--7:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3017430", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Mar 25 07:00:07 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "The raw error rate of a Solid-State drive (SSD) increases gradually with the increase of Program/Erase (P/E) cycles, retention time, and read cycles. Traditional approaches often use Error Correction Code (ECC) to ensure the reliability of SSDs. For error-free flash memory pages, time costs spent on ECC are redundant and make read performance suboptimal. This article presents a CRC-Detect-First LDPC (CDF-LDPC) algorithm to optimize the read performance of SSDs. The basic idea is to bypass Low-Density Parity-Check (LDPC) decoding of error-free flash memory pages, which can be found using a Cyclic Redundancy Check (CRC) code. Thus, error-free pages can be read directly without sacrificing the reliability of SSDs. Experiment results show that the read performance is improved more than 50\% compared with traditional approaches. In particular, when idle time of benchmarks and SSD parallelism are exploited, CDF-LDPC can be performed more efficiently. In this case, the read performance of SSDs can be improved up to about 80\% compared to that of the state-of-art.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "7", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Joo:2017:ERI, author = "Yongsoo Joo and Sangsoo Park and Hyokyung Bahn", title = "Exploiting {I/O} Reordering and {I/O} Interleaving to Improve Application Launch Performance", journal = j-TOS, volume = "13", number = "1", pages = "8:1--8:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3024094", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Mar 25 07:00:07 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Application prefetchers improve application launch performance through either I/O reordering or I/O interleaving. However, there has been no proposal to combine the two techniques together, missing the opportunity for further optimization. We present a new application prefetching technique to take advantage of both the approaches. We evaluated our method with a set of applications to demonstrate that it reduces cold start application launch time by 50\%, which is an improvement of 22\% from the I/O reordering technique.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "8", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Qin:2017:DIR, author = "Chuan Qin and Jingwei Li and Patrick P. C. Lee", title = "The Design and Implementation of a Rekeying-Aware Encrypted Deduplication Storage System", journal = j-TOS, volume = "13", number = "1", pages = "9:1--9:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3032966", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Mar 25 07:00:07 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Rekeying refers to an operation of replacing an existing key with a new key for encryption. It renews security protection to protect against key compromise and enable dynamic access control in cryptographic storage. However, it is non-trivial to realize efficient rekeying in encrypted deduplication storage systems, which use deterministic content-derived encryption keys to allow deduplication on ciphertexts. We design and implement a rekeying-aware encrypted deduplication (REED) storage system. REED builds on a deterministic version of all-or-nothing transform, such that it enables secure and lightweight rekeying, while preserving the deduplication capability. We propose two REED encryption schemes that trade between performance and security and extend REED for dynamic access control. We implement a REED prototype with various performance optimization techniques and demonstrate how we can exploit similarity to mitigate key generation overhead. Our trace-driven testbed evaluation shows that our REED prototype maintains high performance and storage efficiency.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "9", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Maltzahn:2017:ISI, author = "Carlos Maltzahn and Vasily Tarasov", title = "Introduction to the Special Issue on {MSST 2016}", journal = j-TOS, volume = "13", number = "2", pages = "10:1--10:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3078405", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Jun 10 16:10:47 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "10", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Ma:2017:LED, author = "Jingwei Ma and Rebecca J. Stones and Yuxiang Ma and Jingui Wang and Junjie Ren and Gang Wang and Xiaoguang Liu", title = "Lazy Exact Deduplication", journal = j-TOS, volume = "13", number = "2", pages = "11:1--11:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3078837", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Jun 10 16:10:47 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Deduplication aims to reduce duplicate data in storage systems by removing redundant copies of data blocks, which are compared to one another using fingerprints. However, repeated on-disk fingerprint lookups lead to high disk traffic, which results in a bottleneck. In this article, we propose a ``lazy'' data deduplication method, which buffers incoming fingerprints that are used to perform on-disk lookups in batches, with the aim of improving subsequent prefetching. In deduplication in general, prefetching is used to improve the cache hit rate by exploiting locality within the incoming fingerprint stream. For lazy deduplication, we design a buffering strategy that preserves locality in order to facilitate prefetching. Furthermore, as the proportion of deduplication time spent on I/O decreases, the proportion spent on fingerprint calculation and chunking increases. Thus, we also utilize parallel approaches (utilizing multiple CPU cores and a graphics processing unit) to further improve the overall performance. Experimental results indicate that the lazy method improves fingerprint identification performance by over 50\% compared with an ``eager'' method with the same data layout. The GPU improves the hash calculation by a factor of 4.6 and multithreaded chunking by a factor of 4.16. Deduplication performance can be improved by over 45\% on SSD and 80\% on HDD in the last round on the real datasets.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "11", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Lee:2017:RWA, author = "Eunji Lee and Julie Kim and Hyokyung Bahn and Sunjin Lee and Sam H. Noh", title = "Reducing Write Amplification of Flash Storage through Cooperative Data Management with {NVM}", journal = j-TOS, volume = "13", number = "2", pages = "12:1--12:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3060146", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Jun 10 16:10:47 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Write amplification is a critical factor that limits the stable performance of flash-based storage systems. To reduce write amplification, this article presents a new technique that cooperatively manages data in flash storage and nonvolatile memory (NVM). Our scheme basically considers NVM as the cache of flash storage, but allows the original data in flash storage to be invalidated if there is a cached copy in NVM, which can temporarily serve as the original data. This scheme eliminates the copy-out operation for a substantial number of cached data, thereby enhancing garbage collection efficiency. Simulated results show that the proposed scheme reduces the copy-out overhead of garbage collection by 51.4\% and decreases the standard deviation of response time by 35.4\% on average. Measurement results obtained by implementing the proposed scheme in BlueDBM,$^1$ an open-source flash development platform developed by MIT, show that the proposed scheme reduces the execution time and increases IOPS by 2--21\% and 3--18\%, respectively, for the workloads that we considered. This article is an extended version of Lee et al. [2016], which was presented at the 32nd International Conference on Massive Data Storage Systems and Technology in 2016.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "12", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Chen:2017:OFS, author = "Cheng Chen and Jun Yang and Qingsong Wei and Chundong Wang and Mingdi Xue", title = "Optimizing File Systems with Fine-grained Metadata Journaling on Byte-addressable {NVM}", journal = j-TOS, volume = "13", number = "2", pages = "13:1--13:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3060147", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Jun 10 16:10:47 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Journaling file systems have been widely adopted to support applications that demand data consistency. However, we observed that the overhead of journaling can cause up to 48.2\% performance drop under certain kinds of workloads. On the other hand, the emerging high-performance, byte-addressable Non-volatile Memory (NVM) has the potential to minimize such overhead by being used as the journal device. The traditional journaling mechanism based on block devices is nevertheless unsuitable for NVM due to the write amplification of metadata journal we observed. In this article, we propose a fine-grained metadata journal mechanism to fully utilize the low-latency byte-addressable NVM so that the overhead of journaling can be significantly reduced. Based on the observation that conventional block-based metadata journal contains up to 90\% clean metadata that is unnecessary to be journalled, we design a fine-grained journal format for byte-addressable NVM which contains only modified metadata. Moreover, we redesign the process of transaction committing, checkpointing, and recovery in journaling file systems utilizing the new journal format. Therefore, thanks to the reduced amount of ordered writes for journals, the overhead of journaling can be reduced without compromising the file system consistency. To evaluate our fine-grained metadata journaling mechanism, we have implemented a journaling file system prototype based on Ext4 and JBD2 in Linux. Experimental results show that our NVM-based fine-grained metadata journaling is up to 15.8 $ \times $ faster than the traditional approach under FileBench workloads.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "13", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Zhou:2017:UAI, author = "You Zhou and Fei Wu and Ping Huang and Xubin He and Changsheng Xie and Jian Zhou", title = "Understanding and Alleviating the Impact of the Flash Address Translation on Solid State Devices", journal = j-TOS, volume = "13", number = "2", pages = "14:1--14:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3051123", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Jun 10 16:10:47 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Flash-based solid state devices (SSDs) have been widely employed in consumer and enterprise storage systems. However, the increasing SSD capacity imposes great pressure on performing efficient logical to physical address translation in a page-level flash translation layer (FTL). Existing schemes usually employ a built-in RAM to store mapping information, called mapping cache, to speed up the address translation. Since only a fraction of the mapping table can be cached due to limited cache space, a large number of extra flash accesses are required for cache management and garbage collection, degrading the performance and lifetime of an SSD. In this paper, we first apply analytical models to investigate the key factors that incur extra flash accesses during address translation. Then, we propose a novel page-level FTL with an efficient translation page-level caching mechanism, named TPFTL, to minimize the extra flash accesses. TPFTL employs a two-level least recently used (LRU) list with space-efficient optimizations to organize cached mapping entries. Inspired by the models, we further design a workload-adaptive loading policy combined with an efficient replacement policy to increase the cache hit rate and reduce the writebacks of replaced dirty entries. Finally, we evaluate TPFTL using extensive trace-driven simulations. Our evaluation results show that compared to the state-of-the-art FTLs, TPFTL significantly reduces the extra operations caused by address translation, achieving reductions on system response time and write amplification by up to 27.1\% and 32.2\%, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "14", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Liu:2017:HPG, author = "Qing Liu and Dan Feng and Yuchong Hu and Zhan Shi and Min Fu", title = "High-Performance General Functional Regenerating Codes with Near-Optimal Repair Bandwidth", journal = j-TOS, volume = "13", number = "2", pages = "15:1--15:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3051122", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Jun 10 16:10:47 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Erasure codes are widely used in modern distributed storage systems to prevent data loss and server failures. Regenerating codes are a class of erasure codes that trade storage efficiency and computation for repair bandwidth reduction. However, their nonunified coding parameters and huge computational overhead prohibit their applications. Hence, we first propose a family of General Functional Regenerating (GFR) codes with uncoded repair, balancing storage efficiency and repair bandwidth with general parameters. The GFR codes take advantage of a heuristic repair algorithm, which makes efforts to employ as little repair bandwidth as possible to repair a single failure. Second, we also present a scheduled shift multiplication (SSM) algorithm, which accelerates the matrix product over the Galois field by scheduling the order of coding operations, so encoding and repairing of GFR codes can be executed by fast bitwise shifting and exclusive-OR. Compared to the traditional table-lookup multiplication algorithm, our SSM algorithm gains 1.2 to 2 X speedup in our experimental evaluations, with little effect on the repair success rate.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "15", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Hou:2017:UPB, author = "Binbing Hou and Feng Chen and Zhonghong Ou and Ren Wang and Michael Mesnier", title = "Understanding {I/O} Performance Behaviors of Cloud Storage from a Client's Perspective", journal = j-TOS, volume = "13", number = "2", pages = "16:1--16:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3078838", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Jun 10 16:10:47 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Cloud storage has gained increasing popularity in the past few years. In cloud storage, data is stored in the service provider's data centers, and users access data via the network. For such a new storage model, our prior wisdom about conventional storage may not remain valid nor applicable to the emerging cloud storage. In this article, we present a comprehensive study to gain insight into the unique characteristics of cloud storage and optimize user experiences with cloud storage from a client's perspective. Unlike prior measurement work that mostly aims to characterize cloud storage providers or specific client applications, we focus on analyzing the effects of various client-side factors on the user-experienced performance. Through extensive experiments and quantitative analysis, we have obtained several important findings. For example, we find that (1) a proper combination of parallelism and request size can achieve optimized bandwidths, (2) a client's capabilities and geographical location play an important role in determining the end-to-end user-perceivable performance, and (3) the interference among mixed cloud storage requests may cause performance degradation. Based on our findings, we showcase a sampling- and inference-based method to determine a proper combination for different optimization goals. We further present a set of case studies on client-side chunking and parallelization for typical cloud-based applications. Our studies show that specific attention should be paid to fully exploiting the capabilities of clients and the great potential of cloud storage services.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "16", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Iliadis:2017:EEQ, author = "Ilias Iliadis and Jens Jelitto and Yusik Kim and Slavisa Sarafijanovic and Vinodh Venkatesan", title = "{ExaPlan}: Efficient Queueing-Based Data Placement, Provisioning, and Load Balancing for Large Tiered Storage Systems", journal = j-TOS, volume = "13", number = "2", pages = "17:1--17:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3078839", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Jun 10 16:10:47 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tos/; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Multi-tiered storage, where each tier consists of one type of storage device (e.g., SSD, HDD, or disk arrays), is a commonly used approach to achieve both high performance and cost efficiency in large-scale systems that need to store data with vastly different access characteristics. By aligning the access characteristics of the data, either fixed-sized extents or variable-sized files, to the characteristics of the storage devices, a higher performance can be achieved for any given cost. This article presents ExaPlan, a method to determine both the data-to-tier assignment and the number of devices in each tier that minimize the system's mean response time for a given budget and workload. In contrast to other methods that constrain or minimize the system load, ExaPlan directly minimizes the system's mean response time estimated by a queueing model. Minimizing the mean response time is typically intractable as the resulting optimization problem is both nonconvex and combinatorial in nature. ExaPlan circumvents this intractability by introducing a parameterized data placement approach that makes it a highly scalable method that can be easily applied to exascale systems. Through experiments that use parameters from real-world storage systems, such as CERN and LOFAR, it is demonstrated that ExaPlan provides solutions that yield lower mean response times than previous works. It supports standalone SSDs and HDDs as well as disk arrays as storage tiers, and although it uses a static workload representation, we provide empirical evidence that underlying dynamic workloads have invariant properties that can be deemed static for the purpose of provisioning a storage system. ExaPlan is also effective as a load-balancing tool used for placing data across devices within a tier, resulting in an up to 3.6-fold reduction of response time compared with a traditional load-balancing algorithm, such as the Longest Processing Time heuristic.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "17", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Kuenning:2017:ISI, author = "Geoff Kuenning and Carl Waldspurger", title = "Introduction to the Special Issue on {USENIX FAST 2017}", journal = j-TOS, volume = "13", number = "3", pages = "18:1--18:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3131620", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Oct 30 08:04:10 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "18", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Pillai:2017:ACC, author = "Thanumalayan Sankaranarayana Pillai and Ramnatthan Alagappan and Lanyue Lu and Vijay Chidambaram and Andrea C. Arpaci-Dusseau and Remzi H. Arpaci-Dusseau", title = "Application Crash Consistency and Performance with {CCFS}", journal = j-TOS, volume = "13", number = "3", pages = "19:1--19:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3119897", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Oct 30 08:04:10 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Recent research has shown that applications often incorrectly implement crash consistency. We present the Crash-Consistent File System (ccfs), a file system that improves the correctness of application-level crash consistency protocols while maintaining high performance. A key idea in ccfs is the abstraction of a stream. Within a stream, updates are committed in program order, improving correctness; across streams, there are no ordering restrictions, enabling scheduling flexibility and high performance. We empirically demonstrate that applications running atop ccfs achieve high levels of crash consistency. Further, we show that ccfs performance under standard file-system benchmarks is excellent, in the worst case on par with the highest performing modes of Linux ext4, and in some cases notably better. Overall, we demonstrate that both application correctness and high performance can be realized in a modern file system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "19", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Ganesan:2017:RDI, author = "Aishwarya Ganesan and Ramnatthan Alagappan and Andrea C. Arpaci-Dusseau and Remzi H. Arpaci-Dusseau", title = "Redundancy Does Not Imply Fault Tolerance: Analysis of Distributed Storage Reactions to File-System Faults", journal = j-TOS, volume = "13", number = "3", pages = "20:1--20:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3125497", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Oct 30 08:04:10 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "We analyze how modern distributed storage systems behave in the presence of file-system faults such as data corruption and read and write errors. We characterize eight popular distributed storage systems and uncover numerous problems related to file-system fault tolerance. We find that modern distributed systems do not consistently use redundancy to recover from file-system faults: a single file-system fault can cause catastrophic outcomes such as data loss, corruption, and unavailability. We also find that the above outcomes arise due to fundamental problems in file-system fault handling that are common across many systems. Our results have implications for the design of next-generation fault-tolerant distributed and cloud storage systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "20", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Chen:2017:VMN, author = "Ming Chen and Geetika Babu Bangera and Dean Hildebrand and Farhaan Jalia and Geoff Kuenning and Henry Nelson and Erez Zadok", title = "{vNFS}: Maximizing {NFS} Performance with Compounds and Vectorized {I/O}", journal = j-TOS, volume = "13", number = "3", pages = "21:1--21:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3116213", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Oct 30 08:04:10 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Modern systems use networks extensively, accessing both services and storage across local and remote networks. Latency is a key performance challenge, and packing multiple small operations into fewer large ones is an effective way to amortize that cost, especially after years of significant improvement in bandwidth but not latency. To this end, the NFSv4 protocol supports a compounding feature to combine multiple operations. Yet compounding has been underused since its conception because the synchronous POSIX file-system API issues only one (small) request at a time. We propose vNFS, an NFSv4.1-compliant client that exposes a vectorized high-level API and leverages NFS compound procedures to maximize performance. We designed and implemented vNFS as a user-space RPC library that supports an assortment of bulk operations on multiple files and directories. We found it easy to modify several UNIX utilities, an HTTP/2 server, and Filebench to use vNFS. We evaluated vNFS under a wide range of workloads and network latency conditions, showing that vNFS improves performance even for low-latency networks. On high-latency networks, vNFS can improve performance by as much as two orders of magnitude.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "21", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Yan:2017:TTF, author = "Shiqin Yan and Huaicheng Li and Mingzhe Hao and Michael Hao Tong and Swaminathan Sundararaman and Andrew A. Chien and Haryadi S. Gunawi", title = "Tiny-Tail Flash: Near-Perfect Elimination of Garbage Collection Tail Latencies in {NAND SSDs}", journal = j-TOS, volume = "13", number = "3", pages = "22:1--22:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3121133", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Oct 30 08:04:10 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Flash storage has become the mainstream destination for storage users. However, SSDs do not always deliver the performance that users expect. The core culprit of flash performance instability is the well-known garbage collection (GC) process, which causes long delays as the SSD cannot serve (blocks) incoming I/Os, which then induces the long tail latency problem. We present ttFlash as a solution to this problem. ttFlash is a ``tiny-tail'' flash drive (SSD) that eliminates GC-induced tail latencies by circumventing GC-blocked I/Os with four novel strategies: plane-blocking GC, rotating GC, GC-tolerant read, and GC-tolerant flush. These four strategies leverage the timely combination of modern SSD internal technologies such as powerful controllers, parity-based redundancies, and capacitor-backed RAM. Our strategies are dependent on the use of intra-plane copyback operations. Through an extensive evaluation, we show that ttFlash comes significantly close to a ``no-GC'' scenario. Specifically, between the 99 and 99.99th percentiles, ttFlash is only 1.0 to 2.6$ \times $ slower than the no-GC case, while a base approach suffers from 5--138$ \times $ GC-induced slowdowns.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "22", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Kesavan:2017:EFS, author = "Ram Kesavan and Rohit Singh and Travis Grusecki and Yuvraj Patel", title = "Efficient Free Space Reclamation in {WAFL}", journal = j-TOS, volume = "13", number = "3", pages = "23:1--23:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3125647", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Oct 30 08:04:10 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "NetApp$^\reg $ WAFL$^\reg $ is a transactional file system that uses the copy-on-write mechanism to support fast write performance and efficient snapshot creation. However, copy-on-write increases the demand on the file system to find free blocks quickly, which makes rapid free space reclamation essential. Inability to find free blocks quickly may impede allocations for incoming writes. Efficiency is also important, because the task of reclaiming free space may consume CPU and other resources at the expense of client operations. In this article, we describe the evolution (over more than a decade) of the WAFL algorithms and data structures for reclaiming space with minimal impact to the overall performance of the storage appliance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "23", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Li:2017:PDA, author = "Cheng Li and Philip Shilane and Fred Douglis and Grant Wallace", title = "{Pannier}: Design and Analysis of a Container-Based Flash Cache for Compound Objects", journal = j-TOS, volume = "13", number = "3", pages = "24:1--24:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3094785", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Oct 30 08:04:10 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Classic caching algorithms leverage recency, access count, and/or other properties of cached blocks at per-block granularity. However, for media such as flash which have performance and wear penalties for small overwrites, implementing cache policies at a larger granularity is beneficial. Recent research has focused on buffering small blocks and writing in large granularities, sometimes called containers, but it has not explored the ramifications and best strategies for caching compound blocks consisting of logically distinct, but physically co-located, blocks. Containers may have highly diverse blocks, with mixtures of frequently accessed, infrequently accessed, and invalidated blocks. We propose and evaluate Pannier, a flash cache layer that provides high performance while extending flash lifespan. Pannier uses three main techniques: (1) leveraging block access counts to manage cache containers, (2) incorporating block liveness as a property to improve flash cache space efficiency, and (3) designing a multi-step feedback controller to ensure a flash cache reaches its desired lifespan while maintaining performance. Our evaluation shows that Pannier improves flash cache performance and extends lifespan beyond previous per-block and container-aware caching policies. More fundamentally, our investigation highlights the importance of creating new policies for caching compound blocks in flash.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "24", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Chen:2017:EAM, author = "Haibo Chen and Heng Zhang and Mingkai Dong and Zhaoguo Wang and Yubin Xia and Haibing Guan and Binyu Zang", title = "Efficient and Available In-Memory {KV}-Store with Hybrid Erasure Coding and Replication", journal = j-TOS, volume = "13", number = "3", pages = "25:1--25:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3129900", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Oct 30 08:04:10 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "In-memory key/value store (KV-store) is a key building block for many systems like databases and large websites. Two key requirements for such systems are efficiency and availability, which demand a KV-store to continuously handle millions of requests per second. A common approach to availability is using replication, such as primary-backup (PBR), which, however, requires M +1 times memory to tolerate M failures. This renders scarce memory unable to handle useful user jobs. This article makes the first case of building highly available in-memory KV-store by integrating erasure coding to achieve memory efficiency, while not notably degrading performance. A main challenge is that an in-memory KV-store has much scattered metadata. A single KV put may cause excessive coding operations and parity updates due to excessive small updates to metadata. Our approach, namely Cocytus, addresses this challenge by using a hybrid scheme that leverages PBR for small-sized and scattered data (e.g., metadata and key), while only applying erasure coding to relatively large data (e.g., value). To mitigate well-known issues like lengthy recovery of erasure coding, Cocytus uses an online recovery scheme by leveraging the replicated metadata information to continuously serve KV requests. To further demonstrate the usefulness of Cocytus, we have built a transaction layer by using Cocytus as a fast and reliable storage layer to store database records and transaction logs. We have integrated the design of Cocytus to Memcached and extend it to support in-memory transactions. Evaluation using YCSB with different KV configurations shows that Cocytus incurs low overhead for latency and throughput, can tolerate node failures with fast online recovery, while saving 33\% to 46\% memory compared to PBR when tolerating two failures. A further evaluation using the SmallBank OLTP benchmark shows that in-memory transactions can run atop Cocytus with high throughput, low latency, and low abort rate and recover fast from consecutive failures.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "25", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Liu:2017:SEC, author = "Qing Liu and Dan Feng and Hong Jiang and Yuchong Hu and Tianfeng Jiao", title = "Systematic Erasure Codes with Optimal Repair Bandwidth and Storage", journal = j-TOS, volume = "13", number = "3", pages = "26:1--26:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3109479", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Oct 30 08:04:10 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Erasure codes are widely used in distributed storage systems to prevent data loss. Traditional codes suffer from a typical repair-bandwidth problem in which the amount of data required to reconstruct the lost data, referred to as the repair bandwidth, is often far more than the theoretical minimum. While many novel codes have been proposed in recent years to reduce the repair bandwidth, these codes either require extra storage and computation overhead or are only applicable to some special cases. To address the weaknesses of the existing solutions to the repair-bandwidth problem, we propose Z Codes, a general family of codes capable of achieving the theoretical lower bound of repair bandwidth versus storage. To the best of our knowledge, the Z codes are the first general systematic erasure codes that jointly achieve optimal repair bandwidth and storage. Further, we generalize the Z codes to the GZ codes to gain the Maximum Distance Separable (MDS) property. Our evaluations of a real system indicate that Z/GZ and Reed--Solomon (RS) codes show approximately close encoding and repairing speeds, while GZ codes achieve over 37.5\% response time reduction for repairing the same size of data, compared to the RS and Cauchy Reed--Solomon (CRS) codes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "26", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Viotti:2017:HRH, author = "Paolo Viotti and Dan Dobre and Marko Vukoli{\'c}", title = "{Hybris}: Robust Hybrid Cloud Storage", journal = j-TOS, volume = "13", number = "3", pages = "27:1--27:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3119896", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Oct 30 08:04:10 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Besides well-known benefits, commodity cloud storage also raises concerns that include security, reliability, and consistency. We present Hybris key-value store, the first robust hybrid cloud storage system, aiming at addressing these concerns leveraging both private and public cloud resources. Hybris robustly replicates metadata on trusted private premises (private cloud), separately from data, which are dispersed (using replication or erasure coding) across multiple untrusted public clouds. Hybris maintains metadata stored on private premises at the order of few dozens of bytes per key, avoiding the scalability bottleneck at the private cloud. In turn, the hybrid design allows Hybris to efficiently and robustly tolerate cloud outages but also potential malice in clouds without overhead. Namely, to tolerate up to $f$ malicious clouds, in the common case of the Hybris variant with data replication, writes replicate data across $ f + 1$ clouds, whereas reads involve a single cloud. In the worst case, only up to $f$ additional clouds are used. This is considerably better than earlier multi-cloud storage systems that required costly $ 3 f + 1$ clouds to mask $f$ potentially malicious clouds. Finally, Hybris leverages strong metadata consistency to guarantee to Hybris applications strong data consistency without any modifications to the eventually consistent public clouds. We implemented Hybris in Java and evaluated it using a series of micro and macro-benchmarks. Our results show that Hybris significantly outperforms comparable multi-cloud storage systems and approaches the performance of bare-bone commodity public cloud storage.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "27", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Parker-Wood:2017:ISI, author = "Aleatha Parker-Wood and Thomas Schwarz", title = "Introduction to the {Special Issue on Massive Storage Systems and Technology 2017}", journal = j-TOS, volume = "13", number = "4", pages = "28:1--28:??", month = dec, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3148596", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Dec 22 18:16:19 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "28", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Yao:2017:BEK, author = "Ting Yao and Jiguang Wan and Ping Huang and Xubin He and Fei Wu and Changsheng Xie", title = "Building Efficient Key--Value Stores via a Lightweight Compaction Tree", journal = j-TOS, volume = "13", number = "4", pages = "29:1--29:??", month = dec, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3139922", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Dec 22 18:16:19 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Log-Structure Merge tree (LSM-tree) has been one of the mainstream indexes in key-value systems supporting a variety of write-intensive Internet applications in today's data centers. However, the performance of LSM-tree is seriously hampered by constantly occurring compaction procedures, which incur significant write amplification and degrade the write throughput. To alleviate the performance degradation caused by compactions, we introduce a lightweight compaction tree (LWC-tree), a variant of LSM-tree index optimized for minimizing the write amplification and maximizing the system throughput. The lightweight compaction drastically decreases write amplification by appending data in a table and only merging the metadata that have much smaller size. Using our proposed LWC-tree, we have implemented three key-value LWC-stores on different storage mediums including Shingled Magnetic Recording (SMR) drives, Solid State Drives (SSD), and conventional Hard Disk Drives (HDDs). The LWC-store is particularly optimized for SMR drives, as it eliminates the multiplicative I/O amplification from both LSM-trees and SMR drives. Due to the lightweight compaction procedure, LWC-store reduces the write amplification by a factor of up to 5$ \times $ compared to the popular LevelDB key-value store. Moreover, the random write throughput of the LWC-tree on SMR drives is significantly improved by up to 467\% even compared with LevelDB on conventional HDDs. Furthermore, LWC-tree has wide applicability and delivers impressive performance improvement in various conditions, including different storage mediums (i.e., SMR, HDD, SSD) and various value sizes and access patterns (i.e., uniform and Zipfian).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "29", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Liu:2017:OWL, author = "Qingyue Liu and Peter Varman", title = "{Ouroboros} Wear Leveling for {NVRAM} Using Hierarchical Block Migration", journal = j-TOS, volume = "13", number = "4", pages = "30:1--30:??", month = dec, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3139530", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Dec 22 18:16:19 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Emerging nonvolatile RAM (NVRAM) technologies have a limit on the number of writes that can be made to any cell, similar to the erasure limits in NAND Flash. This motivates the need for wear leveling techniques to distribute the writes evenly among the cells. Unlike NAND Flash, cells in NVRAM can be rewritten without the need for erasing the entire containing block, avoiding the issues of space reclamation and garbage collection, motivating alternate approaches to the problem. In this article, we propose a hierarchical wear-leveling model called Ouroboros wear leveling. Ouroboros uses a two-level strategy whereby frequent low-cost intraregion wear leveling at small granularity is combined with interregion wear leveling at a larger time interval and granularity. Ouroboros is a hybrid migration scheme that exploits correct demand predictions in making better wear-leveling decisions while using randomization to avoid wear-leveling attacks by deterministic access patterns. We also propose a way to optimize wear-leveling parameter settings to meet a target smoothness level under limited time and space overhead constraints for different memory architectures and trace characteristics. Several experiments are performed on synthetically generated memory traces with special characteristics, two block-level storage traces, and two memory-line-level memory traces. The results show that Ouroboros wear leveling can distribute writes smoothly across the whole NVRAM with no more than 0.2\% space overhead and 0.52\% time overhead for a 512GB memory.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "30", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Yadgar:2017:ETY, author = "Gala Yadgar and Roman Shor", title = "Experience from Two Years of Visualizing Flash with {SSDPlayer}", journal = j-TOS, volume = "13", number = "4", pages = "31:1--31:??", month = dec, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3149356", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Dec 22 18:16:19 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Data visualization is a thriving field of computer science, with widespread impact on diverse scientific disciplines, from medicine and meteorology to visual data mining. Advances in large-scale storage systems, as well as low-level storage technology, played a significant role in accelerating the applicability and adoption of modern visualization techniques. Ironically, ``the cobbler's children have no shoes'': Researchers who wish to analyze storage systems and devices are usually limited to a variety of static histograms and basic displays. The dynamic nature of data movement on flash has motivated the introduction of SSDPlayer, a graphical tool for visualizing the various processes that cause data movement on solid-state drives (SSDs). In 2015, we used the initial version of SSDPlayer to demonstrate how visualization can assist researchers and developers in their understanding of modern, complex flash-based systems. While we continued to use SSDPlayer for analysis purposes, we found it extremely useful for education and presentation purposes as well. In this article, we describe our experience from two years of using, sharing, and extending SSDPlayer and how similar techniques can further advance storage systems research and education.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "31", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Kim:2017:SSU, author = "Dongjin Kim and Kyu Ho Park and Chan-Hyun Youn", title = "{SUPA}: a Single Unified Read-Write Buffer and Pattern-Change-Aware {FTL} for the High Performance of Multi-Channel {SSD}", journal = j-TOS, volume = "13", number = "4", pages = "32:1--32:??", month = dec, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3129901", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Dec 22 18:16:19 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "To design the write buffer and flash translation layer (FTL) for a solid-state drive (SSD), previous studies have tried to increase overall SSD performance by parallel I/O and garbage collection overhead reduction. Recent works have proposed pattern-based managements, which uses the request size and read- or write-intensiveness to apply different policies to each type of data. In our observation, the locations of read and write requests are closely related, and the pattern of each type of data can be changed. In this work, we propose SUPA, a single unified read-write buffer and pattern-change-aware FTL on multi-channel SSD architecture. To increase both read and write hit ratios on the buffer based on locality, we use a single unified read-write buffer for both clean and dirty blocks. With proposed buffer, we can increase buffer hit ratio up to 8.0\% and reduce 33.6\% and 7.5\% of read and write latencies, respectively. To handle pattern-changed blocks, we add a pattern handler between the buffer and the FTL, which monitors channel status and handles data by applying one of the two different policies according to the pattern changes. With pattern change handling process, we can reduce 1.0\% and 15.4\% of read and write latencies, respectively. In total, our evaluations show that SUPA can get up to 2.0 and 3.9 times less read and write latency, respectively, without loss of lifetime in comparison to previous works.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "32", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Hu:2017:ORL, author = "Yuchong Hu and Xiaolu Li and Mi Zhang and Patrick P. C. Lee and Xiaoyang Zhang and Pan Zhou and Dan Feng", title = "Optimal Repair Layering for Erasure-Coded Data Centers: From Theory to Practice", journal = j-TOS, volume = "13", number = "4", pages = "33:1--33:??", month = dec, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3149349", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Dec 22 18:16:19 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Repair performance in hierarchical data centers is often bottlenecked by cross-rack network transfer. Recent theoretical results show that the cross-rack repair traffic can be minimized through repair layering, whose idea is to partition a repair operation into inner-rack and cross-rack layers. However, how repair layering should be implemented and deployed in practice remains an open issue. In this article, we address this issue by proposing a practical repair layering framework called DoubleR. We design two families of practical double regenerating codes (DRC), which not only minimize the cross-rack repair traffic but also have several practical properties that improve state-of-the-art regenerating codes. We implement and deploy DoubleR atop the Hadoop Distributed File System (HDFS) and show that DoubleR maintains the theoretical guarantees of DRC and improves the repair performance of regenerating codes in both node recovery and degraded read operations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "33", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Zeng:2017:CCS, author = "Lingfang Zeng and Zehao Zhang and Yang Wang and Dan Feng and Kenneth B. Kent", title = "{CosaFS}: a Cooperative Shingle-Aware File System", journal = j-TOS, volume = "13", number = "4", pages = "34:1--34:??", month = dec, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3149482", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Dec 22 18:16:19 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "In this article, we design and implement a cooperative shingle-aware file system, called CosaFS, on heterogeneous storage devices that mix solid-state drives (SSDs) and shingled magnetic recording (SMR) technology to improve the overall performance of storage systems. The basic idea of CosaFS is to classify objects as hot or cold objects based on a proposed Lookahead with Recency Weight scheme. If an object is identified as a hot (small) object, then it will be served by SSD. Otherwise, cold (large) objects are stored on SMR. For an SMR, large objects can be accessed in large sequential blocks, rendering the performance of their accesses comparable with that of accessing the same large sequential blocks as if they were stored on a hard drive. Small objects, such as inodes and directories, are stored on the SSD where ``seeks'' for such objects are nearly free. With thorough empirical studies, we demonstrate that CosaFS, as a cooperative shingle-aware file system, with metadata separation and cache-assistance, is a very effective way to handle the disk-based data demanded by the shingled writes and outperforms the device- and host-side shingle-aware file systems in terms of throughput, IOPS, and access latency as well.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "34", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Einziger:2017:THE, author = "Gil Einziger and Roy Friedman and Ben Manes", title = "{TinyLFU}: a Highly Efficient Cache Admission Policy", journal = j-TOS, volume = "13", number = "4", pages = "35:1--35:??", month = dec, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3149371", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Dec 22 18:16:19 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "This article proposes to use a frequency-based cache admission policy in order to boost the effectiveness of caches subject to skewed access distributions. Given a newly accessed item and an eviction candidate from the cache, our scheme decides, based on the recent access history, whether it is worth admitting the new item into the cache at the expense of the eviction candidate. This concept is enabled through a novel approximate LFU structure called TinyLFU, which maintains an approximate representation of the access frequency of a large sample of recently accessed items. TinyLFU is very compact and lightweight as it builds upon Bloom filter theory. We study the properties of TinyLFU through simulations of both synthetic workloads and multiple real traces from several sources. These simulations demonstrate the performance boost obtained by enhancing various replacement policies with the TinyLFU admission policy. Also, a new combined replacement and eviction policy scheme nicknamed W-TinyLFU is presented. W-TinyLFU is demonstrated to obtain equal or better hit ratios than other state-of-the-art replacement policies on these traces. It is the only scheme to obtain such good results on all traces.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "35", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Hatzieleftheriou:2017:CSJ, author = "Andromachi Hatzieleftheriou and Stergios V. Anastasiadis", title = "Client-Side Journaling for Durable Shared Storage", journal = j-TOS, volume = "13", number = "4", pages = "36:1--36:??", month = dec, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3149372", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Dec 22 18:16:19 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Hardware consolidation in the datacenter often leads to scalability bottlenecks from heavy utilization of critical resources, such as the storage and network bandwidth. Client-side caching on durable media is already applied at block level to reduce the storage backend load but has received criticism for added overhead, restricted sharing, and possible data loss at client crash. We introduce a journal to the kernel-level client of an object-based distributed filesystem to improve durability at high I/O performance and reduced shared resource utilization. Storage virtualization at the file interface achieves clear consistency semantics across data and metadata, supports native file sharing among clients, and provides flexible configuration of durable data staging at the host. Over a prototype that we have implemented, we experimentally quantify the performance and efficiency of the proposed Arion system in comparison to a production system. We run microbenchmarks and application-level workloads over a local cluster and a public cloud. We demonstrate reduced latency by 60\% and improved performance up to 150\% at reduced server network and disk bandwidth by 41\% and 77\%, respectively. The performance improvement reaches 92\% for 16 relational databases as clients and gets as high as 11.3x with two key-value stores as clients.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "36", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Kim:2017:GED, author = "Sang-Hoon Kim and Jinhyuk Lee and Jin-Soo Kim", title = "{GCMix}: an Efficient Data Protection Scheme against the Paired Page Interference", journal = j-TOS, volume = "13", number = "4", pages = "37:1--37:??", month = dec, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3149373", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Dec 22 18:16:19 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "In multi-level cell (MLC) NAND flash memory, two logical pages are overlapped on a single physical page. Even after a logical page is programmed, the data can be corrupted if the programming of the coexisting logical page is interrupted. This phenomenon is called paired page interference. This article proposes a novel software technique to deal with the paired page interference without any additional hardware or extra page write. The proposed technique utilizes valid pages in the victim block during garbage collection (GC) as the backup against the interference, and pairs them with incoming pages written by the host. This approach eliminates undesirable page copy to backup pages against the interference. However, such a strategy has an adverse effect on the hot/cold separation policy, which is essential to improve the efficiency of GC. To limit the downside, we devise a metric to estimate the benefit of GCMix on-the-fly so that GCMix can be adaptively utilized only when the benefit outweighs the overhead. Evaluations using synthetic and real workloads show GCMix can effectively deal with the paired page interference, reducing the write amplification factor by up to 17.5\%compared to the traditional technique, while providing comparable I/O performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "37", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Shafaei:2017:MDM, author = "Mansour Shafaei and Mohammad Hossein Hajkazemi and Peter Desnoyers and Abutalib Aghayev", title = "Modeling Drive-Managed {SMR} Performance", journal = j-TOS, volume = "13", number = "4", pages = "38:1--38:??", month = dec, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3139242", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Dec 22 18:16:19 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Accurately modeling drive-managed Shingled Magnetic Recording (SMR) disks is a challenge, requiring an array of approaches including both existing disk modeling techniques as well as new techniques for inferring internal translation layer algorithms. In this work, we present the first predictive simulation model of a generally available drive-managed SMR disk. Despite the use of unknown proprietary algorithms in this device, our model that is derived from external measurements is able to predict mean latency within a few percent, and with an Root Mean Square (RMS) cumulative latency error of 25\% or less for most workloads tested. These variations, although not small, are in most cases less than three times the drive-to-drive variation seen among seemingly identical drives.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "38", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Haghdoost:2017:HSR, author = "Alireza Haghdoost and Weiping He and Jerry Fredin and David H. C. Du", title = "{\tt hfplayer}: Scalable Replay for Intensive Block {I/O} Workloads", journal = j-TOS, volume = "13", number = "4", pages = "39:1--39:??", month = dec, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3149392", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Dec 22 18:16:19 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "We introduce new methods to replay intensive block I/O workloads more accurately. These methods can be used to reproduce realistic workloads for benchmarking, performance validation, and tuning of a high-performance block storage device/system. In this article, we study several sources in the stock operating system that introduce uncertainty in the workload replay. Based on the remedies of these findings, we design and develop a new replay tool called hfplayer that replays intensive block I/O workloads in a similar unscaled environment with more accuracy. To replay a given workload trace in a scaled environment with faster storage or host server, the dependency between I/O requests becomes crucial since the timing and ordering of I/O requests is expected to change according to these dependencies. Therefore, we propose a heuristic way of speculating I/O dependencies in a block I/O trace. Using the generated dependency graph, hfplayer tries to propagate I/O related performance gains appropriately along the I/O dependency chains and mimics the original application behavior when it executes in a scaled environment with slower or faster storage system and servers. We evaluate hfplayer with a wide range of workloads using several accuracy metrics and find that it produces better accuracy when compared to other replay approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "39", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Hou:2017:GLL, author = "Binbing Hou and Feng Chen", title = "{GDS--LC}: a Latency- and Cost-Aware Client Caching Scheme for Cloud Storage", journal = j-TOS, volume = "13", number = "4", pages = "40:1--40:??", month = dec, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3149374", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Dec 22 18:16:19 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Successfully integrating cloud storage as a primary storage layer in the I/O stack is highly challenging. This is essentially due to two inherent critical issues: the high and variant cloud I/O latency and the per-I/O pricing model of cloud storage. To minimize the associated latency and monetary cost with cloud I/Os, caching is a crucial technology, as it directly influences how frequently the client has to communicate with the cloud. Unfortunately, current cloud caching schemes are mostly designed to optimize miss reduction as the sole objective and only focus on improving system performance while ignoring the fact that various cache misses could have completely distinct effects in terms of latency and monetary cost. In this article, we present a cost-aware caching scheme, called GDS-LC, which is highly optimized for cloud storage caching. Different from traditional caching schemes that merely focus on improving cache hit ratios and the classic cost-aware schemes that can only achieve a single optimization target, GDS-LC offers a comprehensive cache design by considering not only the access locality but also the object size, associated latency, and price, aiming at enhancing the user experience with cloud storage from two aspects: access latency and monetary cost. To achieve this, GDS-LC virtually partitions the cache space into two regions: a high-priority latency-aware region and a low-priority price-aware region. Each region is managed by a cost-aware caching scheme, which is based on GreedyDual-Size (GDS) and designed for a cloud storage scenario by adopting clean-dirty differentiation and latency normalization. The GDS-LC framework is highly flexible, and we present a further enhanced algorithm, called GDS-LCF, by incorporating access frequency in caching decisions. We have built a prototype to emulate a typical cloud client cache and evaluate GDS-LC and GDS-LCF with Amazon Simple Storage Services (S3) in three different scenarios: local cloud, Internet cloud, and heterogeneous cloud. Our experimental results show that our caching schemes can effectively achieve both optimization goals: low access latency and low monetary cost. It is our hope that this work can inspire the community to reconsider the cache design in the cloud environment, especially for the purpose of integrating cloud storage into the current storage stack as a primary layer.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "40", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Noh:2018:ECL, author = "Sam H. Noh", title = "{Editor-in-Chief} Letter", journal = j-TOS, volume = "14", number = "1", pages = "1:1--1:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3180478", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:48 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "1", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Xue:2018:ISI, author = "Chun Jason Xue and Michael Swift", title = "Introduction to the Special Issue on {NVM} and Storage", journal = j-TOS, volume = "14", number = "1", pages = "2:1--2:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3180480", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:48 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "2", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Chen:2018:UUS, author = "Shuo-Han Chen and Tseng-Yi Chen and Yuan-Hao Chang and Hsin-Wen Wei and Wei-Kuan Shih", title = "{UnistorFS}: a Union Storage File System Design for Resource Sharing between Memory and Storage on Persistent {RAM}-Based Systems", journal = j-TOS, volume = "14", number = "1", pages = "3:1--3:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3177918", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:48 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "With the advanced technology in persistent random access memory (PRAM), PRAM such as three-dimensional XPoint memory and Phase Change Memory (PCM) is emerging as a promising candidate for the next-generation medium for both (main) memory and storage. Previous works mainly focus on how to overcome the possible endurance issues of PRAM while both main memory and storage own a partition on the same PRAM device. However, a holistic software-level system design should be proposed to fully exploit the benefit of PRAM. This article proposes a union storage file system (UnistorFS), which aims to jointly manage the PRAM resource for main memory and storage. The proposed UnistorFS realizes the concept of using the PRAM resource as memory and storage interchangeably to achieve resource sharing while main memory and storage coexist on the same PRAM device with no partition or logical boundary. This approach not only enables PRAM resource sharing but also eliminates unnecessary data movements between main memory and storage since they are already in the same address space and can be accessed directly. At the same time, the proposed UnistorFS ensures the persistence of file data and sanity of the file system after power recycling. A series of experiments was conducted on a modified Linux kernel. The results show that the proposed UnistorFS can eliminate unnecessary memory accesses and outperform other PRAM-based file systems for 0.2--8.7 times in terms of read/write performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "3", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Chen:2018:HPM, author = "Youmin Chen and Jiwu Shu and Jiaxin Ou and Youyou Lu", title = "{HiNFS}: a Persistent Memory File System with Both Buffering and Direct-Access", journal = j-TOS, volume = "14", number = "1", pages = "4:1--4:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3204454", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:48 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Persistent memory provides data persistence at main memory with emerging non-volatile main memories (NVMMs). Recent persistent memory file systems aggressively use direct access, which directly copy data between user buffer and the storage layer, to avoid the double-copy overheads through the OS page cache. However, we observe they all suffer from slow writes due to NVMMs' asymmetric read-write performance and much slower performance than DRAM. In this article, we propose HiNFS, a high-performance file system for non-volatile main memory, to combine both buffering and direct access for fine-grained file system operations. HiNFS uses an NVMM-aware Write Buffer to buffer the lazy-persistent file writes in DRAM, while performing direct access to NVMM for eager-persistent file writes. It directly reads file data from both DRAM and NVMM, by ensuring read consistency with a combination of the DRAM Block Index and Cacheline Bitmap to track the latest data between DRAM and NVMM. HiNFS also employs a Buffer Benefit Model to identify the eager-persistent file writes before issuing I/Os. Evaluations show that HiNFS significantly improves throughput by up to 184\% and reduces execution time by up to 64\%comparing with state-of-the-art persistent memory file systems PMFS and EXT4-DAX.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "4", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Kim:2018:CTC, author = "Wook-Hee Kim and Jihye Seo and Jinwoong Kim and Beomseok Nam", title = "{clfB-tree}: Cacheline Friendly Persistent {B}-tree for {NVRAM}", journal = j-TOS, volume = "14", number = "1", pages = "5:1--5:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3129263", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:48 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Emerging byte-addressable non-volatile memory (NVRAM) is expected to replace block device storages as an alternative low-latency persistent storage device. If NVRAM is used as a persistent storage device, a cache line instead of a disk page will be the unit of data transfer, consistency, and durability. In this work, we design and develop clfB-tree -a B-tree structure whose tree node fits in a single cache line. We employ existing write combining store buffer and restricted transactional memory to provide a failure-atomic cache line write operation. Using the failure-atomic cache line write operations, we atomically update a clfB-tree node via a single cache line flush instruction without major changes in hardware. However, there exist many processors that do not provide SW interface for transactional memory. For those processors, our proposed clfB-tree achieves atomicity and consistency via in-place update, which requires maximum four cache line flushes. We evaluate the performance of clfB-tree on an NVRAM emulation board with ARM Cortex A-9 processor and a workstation that has Intel Xeon E7-4809 v3 processor. Our experimental results show clfB-tree outperforms wB-tree and CDDS B-tree by a large margin in terms of both insertion and search performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "5", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Wang:2018:PRT, author = "Chundong Wang and Qingsong Wei and Lingkun Wu and Sibo Wang and Cheng Chen and Xiaokui Xiao and Jun Yang and Mingdi Xue and Yechao Yang", title = "Persisting {RB-Tree} into {NVM} in a Consistency Perspective", journal = j-TOS, volume = "14", number = "1", pages = "6:1--6:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3177915", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:48 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Byte-addressable non-volatile memory (NVM) is going to reshape conventional computer systems. With advantages of low latency, byte-addressability, and non-volatility, NVM can be directly put on the memory bus to replace DRAM. As a result, both system and application software have to be adjusted to perceive the fact that the persistent layer moves up to the memory. However, most of the current in-memory data structures will be problematic with consistency issues if not well tuned with NVM. This article places emphasis on an important in-memory structure that is widely used in computer systems, i.e., the Red/Black-tree (RB-tree). Since it has a long and complicated update process, the RB-tree is prone to inconsistency problems with NVM. This article presents an NVM-compatible consistent RB-tree with a new technique named cascade-versioning. The proposed RB-tree (i) is all-time consistent and scalable and (ii) needs no recovery procedure after system crashes. Experiment results show that the RB-tree for NVM not only achieves the aim of consistency with insignificant spatial overhead but also yields comparable performance to an ordinary volatile RB-tree.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "6", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Liu:2018:DDT, author = "Mengxing Liu and Mingxing Zhang and Kang Chen and Xuehai Qian and Yongwei Wu and Weimin Zheng and Jinglei Ren", title = "{DudeTx}: Durable Transactions Made Decoupled", journal = j-TOS, volume = "14", number = "1", pages = "7:1--7:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3177920", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:48 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Emerging non-volatile memory (NVM) offers non-volatility, byte-addressability, and fast access at the same time. It is suggested that programs should access NVM directly through CPU load and store instructions. To guarantee crash consistency, durable transactions are regarded as a common choice of applications for accessing persistent memory data. However, existing durable transaction systems employ either undo logging, which requires a fence for every memory write, or redo logging, which requires intercepting all memory reads within transactions. Both approaches incur significant overhead. This article presents DudeTx, a crash-consistent durable transaction system that avoids the drawbacks of both undo and redo logging. DudeTx uses shadow DRAM to decouple the execution of a durable transaction into three fully asynchronous steps. The advantage is that only minimal fences and no memory read instrumentation are required. This design enables an out-of-the-box concurrency control mechanism, transactional memory or fine-grained locks, to be used as an independent component. The evaluation results show that DudeTx adds durability to a software transactional memory system with only 7.4\%--24.6\% throughput degradation. Compared to typical existing durable transaction systems, DudeTx provides 1.7$ \times $ --4.4$ \times $ higher throughput. Moreover, DudeTx can be implemented with hardware transactional memory or lock-based concurrency control, leading to a further 1.7$ \times $ and 3.3$ \times $ speedup, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "7", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Sun:2018:BDS, author = "Yuliang Sun and Yu Wang and Huazhong Yang", title = "Bidirectional Database Storage and {SQL} Query Exploiting {RRAM}-Based Process-in-Memory Structure", journal = j-TOS, volume = "14", number = "1", pages = "8:1--8:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3177917", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:48 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "With the coming of the ``Big Data'' era, a high-energy-efficiency database is demanded for the Internet of things (IoT) application scenarios. The emerging Resistive Random Access Memory (RRAM) has been considered as an energy-efficient replacement of DRAM for next-generation main memory. In this article, we propose an RRAM-based SQL query unit with process-in-memory (PIM) characteristics. A bidirectional storage structure for a database in RRAM crossbar array is proposed that avoids redundant data transfer to cache and reduces cache miss rate compared with the storage method in DRAM for an in-memory database. The proposed RRAM-based SQL query unit can support a representative subset of SQL queries in memory and thus can further reduce the data transfer cost. The corresponding query optimization method is proposed to fully utilize the PIM characteristics. Simulation results show that the energy efficiency of the proposed RRAM-based SQL query unit is increased by 4 to 6 orders of magnitudes compared with the traditional architecture.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "8", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Han:2018:NRB, author = "Lei Han and Zhaoyan Shen and Duo Liu and Zili Shao and H. Howie Huang and Tao Li", title = "A Novel {ReRAM}-Based Processing-in-Memory Architecture for Graph Traversal", journal = j-TOS, volume = "14", number = "1", pages = "9:1--9:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3177916", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:48 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Graph algorithms such as graph traversal have been gaining ever-increasing importance in the era of big data. However, graph processing on traditional architectures issues many random and irregular memory accesses, leading to a huge number of data movements and the consumption of very large amounts of energy. To minimize the waste of memory bandwidth, we investigate utilizing processing-in-memory (PIM), combined with non-volatile metal-oxide resistive random access memory (ReRAM), to improve both computation and I/O performance. We propose a new ReRAM-based processing-in-memory architecture called RPBFS, in which graph data can be persistently stored and processed in place. We study the problem of graph traversal, and we design an efficient graph traversal algorithm in RPBFS. Benefiting from low data movement overhead and high bank-level parallel computation, RPBFS shows a significant performance improvement compared with both the CPU-based and the GPU-based BFS implementations. On a suite of real-world graphs, our architecture yields a speedup in graph traversal performance of up to 33.8$ \times $, and achieves a reduction in energy over conventional systems of up to 142.8$ \times $.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "9", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Yadgar:2018:AFP, author = "Gala Yadgar and Eitan Yaakobi and Fabio Margaglia and Yue Li and Alexander Yucovich and Nachum Bundak and Lior Gilon and Nir Yakovi and Assaf Schuster and Andr{\'e} Brinkmann", title = "An Analysis of Flash Page Reuse With {WOM} Codes", journal = j-TOS, volume = "14", number = "1", pages = "10:1--10:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3177886", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:48 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Flash memory is prevalent in modern servers and devices. Coupled with the scaling down of flash technology, the popularity of flash memory motivates the search for methods to increase flash reliability and lifetime. Erasures are the dominant cause of flash cell wear, but reducing them is challenging because flash is a write-once medium- memory cells must be erased prior to writing. An approach that has recently received considerable attention relies on write-once memory (WOM) codes, designed to accommodate additional writes on write-once media. However, the techniques proposed for reusing flash pages with WOM codes are limited in their scope. Many focus on the coding theory alone, whereas others suggest FTL designs that are application specific, or not applicable due to their complexity, overheads, or specific constraints of multilevel cell (MLC) flash. This work is the first that addresses all aspects of page reuse within an end-to-end analysis of a general-purpose FTL on MLC flash. We use a hardware evaluation setup to directly measure the short- and long-term effects of page reuse on SSD durability and energy consumption, and show that FTL design must explicitly take them into account. We then provide a detailed analytical model for deriving the optimal garbage collection policy for such FTL designs, and for predicting the benefit from reuse on realistic hardware and workload characteristics.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "10", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Ho:2018:SLP, author = "Chien-Chung Ho and Yu-Ming Chang and Yuan-Hao Chang and Tei-Wei Kuo", title = "An {SLC}-Like Programming Scheme for {MLC} Flash Memory", journal = j-TOS, volume = "14", number = "1", pages = "11:1--11:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3129257", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:48 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Although the multilevel cell (MLC) technique is widely adopted by flash-memory vendors to boost the chip density and lower the cost, it results in serious performance and reliability problems. Different from past work, a new cell programming method is proposed to not only significantly improve chip performance but also reduce the potential bit error rate. In particular, a single-level cell (SLC)-like programming scheme is proposed to better explore the threshold-voltage relationship to denote different MLC bit information, which in turn drastically provides a larger window of threshold voltage similar to that found in SLC chips. It could result in less programming iterations and simultaneously a much less reliability problem in programming flash-memory cells. In the experiments, the new programming scheme could accelerate the programming speed up to 742\% and even reduce the bit error rate up to 471\% for MLC pages.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "11", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Hu:2018:FMR, author = "Xiameng Hu and Xiaolin Wang and Lan Zhou and Yingwei Luo and Zhenlin Wang and Chen Ding and Chencheng Ye", title = "Fast Miss Ratio Curve Modeling for Storage Cache", journal = j-TOS, volume = "14", number = "2", pages = "12:1--12:??", month = may, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3185751", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "The reuse distance (least recently used (LRU) stack distance) is an essential metric for performance prediction and optimization of storage cache. Over the past four decades, there have been steady improvements in the algorithmic efficiency of reuse distance measurement. This progress is accelerating in recent years, both in theory and practical implementation. In this article, we present a kinetic model of LRU cache memory, based on the average eviction time (AET) of the cached data. The AET model enables fast measurement and use of low-cost sampling. It can produce the miss ratio curve in linear time with extremely low space costs. On storage trace benchmarks, AET reduces the time and space costs compared to former techniques. Furthermore, AET is a composable model that can characterize shared cache behavior through sampling and modeling individual programs or traces.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "12", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{ZhenJ:2018:CSN, author = "Zhen ``Jason'' Sun and Geoff Kuenning and Sonam Mandal and Philip Shilane and Vasily Tarasov and Nong Xiao and Erez Zadok", title = "Cluster and Single-Node Analysis of Long-Term Deduplication Patterns", journal = j-TOS, volume = "14", number = "2", pages = "13:1--13:??", month = may, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3183890", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Deduplication has become essential in disk-based backup systems, but there have been few long-term studies of backup workloads. Most past studies either were of a small static snapshot or covered only a short period that was not representative of how a backup system evolves over time. For this article, we first collected 21 months of data from a shared user file system; 33 users and over 4,000 snapshots are covered. We then analyzed the dataset, examining a variety of essential characteristics across two dimensions: single-node deduplication and cluster deduplication. For single-node deduplication analysis, our primary focus was individual-user data. Despite apparently similar roles and behavior among all of our users, we found significant differences in their deduplication ratios. Moreover, the data that some users share with others had a much higher deduplication ratio than average. For cluster deduplication analysis, we implemented seven published data-routing algorithms and created a detailed comparison of their performance with respect to deduplication ratio, load distribution, and communication overhead. We found that per-file routing achieves a higher deduplication ratio than routing by super-chunk (multiple consecutive chunks), but it also leads to high data skew (imbalance of space usage across nodes). We also found that large chunking sizes are better for cluster deduplication, as they significantly reduce data-routing overhead, while their negative impact on deduplication ratios is small and acceptable. We draw interesting conclusions from both single-node and cluster deduplication analysis and make recommendations for future deduplication systems design.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "13", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Zhou:2018:EEE, author = "Deng Zhou and Vania Fang and Tao Xie and Wen Pan and Ram Kesavan and Tony Lin and Naresh Patel", title = "Empirical Evaluation and Enhancement of Enterprise Storage System Request Scheduling", journal = j-TOS, volume = "14", number = "2", pages = "14:1--14:??", month = may, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3193741", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Since little has been reported in the literature concerning enterprise storage system file-level request scheduling, we do not have enough knowledge about how various scheduling factors affect performance. Moreover, we are in lack of a good understanding on how to enhance request scheduling to adapt to the changing characteristics of workloads and hardware resources. To answer these questions, we first build a request scheduler prototype based on WAFL\reg, a mainstream file system running on numerous enterprise storage systems worldwide. Next, we use the prototype to quantitatively measure the impact of various scheduling configurations on performance on a NetApp\reg's enterprise-class storage system. Several observations have been made. For example, we discover that in order to improve performance, the priority of write requests and non-preempted restarted requests should be boosted in some workloads. Inspired by these observations, we further propose two scheduling enhancement heuristics called SORD (size-oriented request dispatching) and QATS (queue-depth aware time slicing). Finally, we evaluate them by conducting a wide range of experiments using workloads generated by SPC-1 and SFS2014 on both HDD-based and all-flash platforms. Experimental results show that the combination of the two can noticeably reduce average request latency under some workloads.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "14", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Teng:2018:LCD, author = "Dejun Teng and Lei Guo and Rubao Lee and Feng Chen and Yanfeng Zhang and Siyuan Ma and Xiaodong Zhang", title = "A Low-cost Disk Solution Enabling {LSM}-tree to Achieve High Performance for Mixed Read\slash Write Workloads", journal = j-TOS, volume = "14", number = "2", pages = "15:1--15:??", month = may, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3162615", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "LSM-tree has been widely used in data management production systems for write-intensive workloads. However, as read and write workloads co-exist under LSM-tree, data accesses can experience long latency and low throughput due to the interferences to buffer caching from the compaction, a major and frequent operation in LSM-tree. After a compaction, the existing data blocks are reorganized and written to other locations on disks. As a result, the related data blocks that have been loaded in the buffer cache are invalidated since their referencing addresses are changed, causing serious performance degradations. To re-enable high-speed buffer caching during intensive writes, we propose Log-Structured buffered-Merge tree (simplified as LSbM-tree) by adding a compaction buffer on disks to minimize the cache invalidations on buffer cache caused by compactions. The compaction buffer efficiently and adaptively maintains the frequently visited datasets. In LSbM, strong locality objects can be effectively kept in the buffer cache with minimum or no harmful invalidations. With the help of a small on-disk compaction buffer, LSbM achieves a high query performance by enabling effective buffer caching, while retaining all the merits of LSM-tree for write-intensive data processing and providing high bandwidth of disks for range queries. We have implemented LSbM based on LevelDB. We show that with a standard buffer cache and a hard disk, LSbM can achieve 2x performance improvement over LevelDB. We have also compared LSbM with other existing solutions to show its strong cache effectiveness.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "15", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Xiong:2018:CFG, author = "Qin Xiong and Fei Wu and Zhonghai Lu and Yue Zhu and You Zhou and Yibing Chu and Changsheng Xie and Ping Huang", title = "Characterizing {$3$D} Floating Gate {NAND} Flash: Observations, Analyses, and Implications", journal = j-TOS, volume = "14", number = "2", pages = "16:1--16:??", month = may, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3162616", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "As both NAND flash memory manufacturers and users are turning their attentions from planar architecture towards three-dimensional (3D) architecture, it becomes critical and urgent to understand the characteristics of 3D NAND flash memory. These characteristics, especially those different from planar NAND flash, can significantly affect design choices of flash management techniques. In this article, we present a characterization study on the state-of-the-art 3D floating gate (FG) NAND flash memory through comprehensive experiments on an FPGA-based 3D NAND flash evaluation platform. We make distinct observations on its performance and reliability, such as operation latencies and various error patterns, followed by careful analyses from physical and circuit-level perspectives. Although 3D FG NAND flash provides much higher storage densities than planar NAND flash, it faces new performance challenges of garbage collection overhead and program performance variations and more complicated reliability issues due to, e.g., distinct location dependence and value dependence of errors. We also summarize the differences between 3D FG NAND flash and planar NAND flash and discuss implications on the designs of NAND flash management techniques brought by the architecture innovation. We believe that our work will facilitate developing novel 3D FG NAND flash-oriented designs to achieve better performance and reliability.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "16", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Yoo:2018:OOF, author = "Jinsoo Yoo and Joontaek Oh and Seongjin Lee and Youjip Won and Jin-Yong Ha and Jongsung Lee and Junseok Shim", title = "{OrcFS}: Orchestrated File System for Flash Storage", journal = j-TOS, volume = "14", number = "2", pages = "17:1--17:??", month = may, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3162614", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "In this work, we develop the Orchestrated File System (OrcFS) for Flash storage. OrcFS vertically integrates the log-structured file system and the Flash-based storage device to eliminate the redundancies across the layers. A few modern file systems adopt sophisticated append-only data structures in an effort to optimize the behavior of the file system with respect to the append-only nature of the Flash memory. While the benefit of adopting an append-only data structure seems fairly promising, it makes the stack of software layers full of unnecessary redundancies, leaving substantial room for improvement. The redundancies include (i) redundant levels of indirection (address translation), (ii) duplicate efforts to reclaim the invalid blocks (i.e., segment cleaning in the file system and garbage collection in the storage device), and (iii) excessive over-provisioning (i.e., separate over-provisioning areas in each layer). OrcFS eliminates these redundancies via distributing the address translation, segment cleaning (or garbage collection), bad block management, and wear-leveling across the layers. Existing solutions suffer from high segment cleaning overhead and cause significant write amplification due to mismatch between the file system block size and the Flash page size. To optimize the I/O stack while avoiding these problems, OrcFS adopts three key technical elements. First, OrcFS uses disaggregate mapping, whereby it partitions the Flash storage into two areas, managed by a file system and Flash storage, respectively, with different granularity. In OrcFS, the metadata area and data area are maintained by 4Kbyte page granularity and 256Mbyte superblock granularity. The superblock-based storage management aligns the file system section size, which is a unit of segment cleaning, with the superblock size of the underlying Flash storage. It can fully exploit the internal parallelism of the underlying Flash storage, exploiting the sequential workload characteristics of the log-structured file system. Second, OrcFS adopts quasi-preemptive segment cleaning to prohibit the foreground I/O operation from being interfered with by segment cleaning. The latency to reclaim the free space can be prohibitive in OrcFS due to its large file system section size, 256Mbyte. OrcFS effectively addresses this issue via adopting a polling-based segment cleaning scheme. Third, the OrcFS introduces block patching to avoid unnecessary write amplification in the partial page program. OrcFS is the enhancement of the F2FS file system. We develop a prototype OrcFS based on F2FS and server class SSD with modified firmware (Samsung 843TN). OrcFS reduces the device mapping table requirement to 1/465 and 1/4 compared with the page mapping and the smallest mapping scheme known to the public, respectively. Via eliminating the redundancy in the segment cleaning and garbage collection, the OrcFS reduces 1/3 of the write volume under heavy random write workload. OrcFS achieves 56\% performance gain against EXT4 in varmail workload.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "17", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Vef:2018:CST, author = "Marc-Andr{\'e} Vef and Vasily Tarasov and Dean Hildebrand and Andr{\'e} Brinkmann", title = "Challenges and Solutions for Tracing Storage Systems: a Case Study with Spectrum Scale", journal = j-TOS, volume = "14", number = "2", pages = "18:1--18:??", month = may, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3149376", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "IBM Spectrum Scale's parallel file system General Parallel File System (GPFS) has a 20-year development history with over 100 contributing developers. Its ability to support strict POSIX semantics across more than 10K clients leads to a complex design with intricate interactions between the cluster nodes. Tracing has proven to be a vital tool to understand the behavior and the anomalies of such a complex software product. However, the necessary trace information is often buried in hundreds of gigabytes of by-product trace records. Further, the overhead of tracing can significantly impact running applications and file system performance, limiting the use of tracing in a production system. In this research article, we discuss the evolution of the mature and highly scalable GPFS tracing tool and present the exploratory study of GPFS' new tracing interface, FlexTrace, which allows developers and users to accurately specify what to trace for the problem they are trying to solve. We evaluate our methodology and prototype, demonstrating that the proposed approach has negligible overhead, even under intensive I/O workloads and with low-latency storage devices.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "18", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Kashyap:2018:WCE, author = "Anil Kashyap", title = "Workload Characterization for Enterprise Disk Drives", journal = j-TOS, volume = "14", number = "2", pages = "19:1--19:??", month = may, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3151847", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "The article presents an analysis of drive workloads from enterprise storage systems. The drive workloads are obtained from field return units from a cross-section of enterprise storage system vendors and thus provides a view of the workload characteristics over a wide spectrum of end-user applications. The workload parameters that have been characterized include transfer lengths, access patterns, throughput, and utilization. The study shows that reads are the dominant workload accounting for 80\% of the accesses to the drive. Writes are dominated by short block random accesses while reads range from random to highly sequential. A trend analysis over the period 2010-2014 shows that the workload has remained fairly constant even as the capacities of the drives shipped has steadily increased. The study shows that the data stored on disk drives is relatively cold-on average less than 4\% of the drive capacity is accessed in a given 2h interval.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "19", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Agrawal:2018:ISI, author = "Nitin Agrawal and Raju Rangaswami", title = "Introduction to the Special Issue on {USENIX FAST 2018}", journal = j-TOS, volume = "14", number = "3", pages = "20:1--20:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3242152", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "20", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Alagappan:2018:PAR, author = "Ramnatthan Alagappan and Aishwarya Ganesan and Eric Lee and Aws Albarghouthi and Vijay Chidambaram and Andrea C. Arpaci-Dusseau and Remzi H. Arpaci-Dusseau", title = "Protocol-Aware Recovery for Consensus-Based Distributed Storage", journal = j-TOS, volume = "14", number = "3", pages = "21:1--21:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3241062", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "We introduce protocol-aware recovery (P ar), a new approach that exploits protocol-specific knowledge to correctly recover from storage faults in distributed systems. We demonstrate the efficacy of Par through the design and implementation of corruption-tolerant replication (Ctrl), a Par mechanism specific to replicated state machine (RSM) systems. We experimentally show that the Ctrl versions of two systems, LogCabin and ZooKeeper, safely recover from storage faults and provide high availability, while the unmodified versions can lose data or become unavailable. We also show that the Ctrl versions achieve this reliability with little performance overheads.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "21", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Zhan:2018:EDM, author = "Yang Zhan and Yizheng Jiao and Donald E. Porter and Alex Conway and Eric Knorr and Martin Farach-Colton and Michael A. Bender and Jun Yuan and William Jannen and Rob Johnson", title = "Efficient Directory Mutations in a Full-Path-Indexed File System", journal = j-TOS, volume = "14", number = "3", pages = "22:1--22:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3241061", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Full-path indexing can improve I/O efficiency for workloads that operate on data organized using traditional, hierarchical directories, because data is placed on persistent storage in scan order. Prior results indicate, however, that renames in a local file system with full-path indexing are prohibitively expensive. This article shows how to use full-path indexing in a file system to realize fast directory scans, writes, and renames. The article introduces a range-rename mechanism for efficient key-space changes in a write-optimized dictionary. This mechanism is encapsulated in the key-value Application Programming Interface (API) and simplifies the overall file system design. We implemented this mechanism in B$^{\& amp; egr; }$ -trees File System (BetrFS), an in-kernel, local file system for Linux. This new version, BetrFS 0.4, performs recursive greps 1.5x faster and random writes 1.2x faster than BetrFS 0.3, but renames are competitive with indirection-based file systems for a range of sizes. BetrFS 0.4 outperforms BetrFS 0.3, as well as traditional file systems, such as ext4, Extents File System (XFS), and Z File System (ZFS), across a variety of workloads.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "22", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Gunawi:2018:FSS, author = "Haryadi S. Gunawi and Riza O. Suminto and Russell Sears and Casey Golliher and Swaminathan Sundararaman and Xing Lin and Tim Emami and Weiguang Sheng and Nematollah Bidokhti and Caitie McCaffrey and Deepthi Srinivasan and Biswaranjan Panda and Andrew Baptist and Gary Grider and Parks M. Fields and Kevin Harms and Robert B. Ross and Andree Jacobson and Robert Ricci and Kirk Webb and Peter Alvaro and H. Birali Runesha and Mingzhe Hao and Huaicheng Li", title = "Fail-Slow at Scale: Evidence of Hardware Performance Faults in Large Production Systems", journal = j-TOS, volume = "14", number = "3", pages = "23:1--23:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3242086", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Fail-slow hardware is an under-studied failure mode. We present a study of 114 reports of fail-slow hardware incidents, collected from large-scale cluster deployments in 14 institutions. We show that all hardware types such as disk, SSD, CPU, memory, and network components can exhibit performance faults. We made several important observations such as faults convert from one form to another, the cascading root causes and impacts can be long, and fail-slow faults can have varying symptoms. From this study, we make suggestions to vendors, operators, and systems designers.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "23", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Won:2018:BOC, author = "Youjip Won and Joontaek Oh and Jaemin Jung and Gyeongyeol Choi and Seongbae Son and Jooyoung Hwang and Sangyeun Cho", title = "Bringing Order to Chaos: Barrier-Enabled {I/O} Stack for Flash Storage", journal = j-TOS, volume = "14", number = "3", pages = "24:1--24:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3242091", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "This work is dedicated to eliminating the overhead required for guaranteeing the storage order in the modern IO stack. The existing block device adopts a prohibitively expensive approach in ensuring the storage order among write requests: interleaving the write requests with Transfer-and-Flush. For exploiting the cache barrier command for flash storage, we overhaul the IO scheduler, the dispatch module, and the filesystem so that these layers are orchestrated to preserve the ordering condition imposed by the application with which the associated data blocks are made durable. The key ingredients of Barrier-Enabled IO stack are Epoch-based IO scheduling, Order-Preserving Dispatch, and Dual-Mode Journaling. Barrier-enabled IO stack can control the storage order without Transfer-and-Flush overhead. We implement the barrier-enabled IO stack in server as well as in mobile platforms. SQLite performance increases by 270\% and 75\%, in server and in smartphone, respectively. In a server storage, BarrierFS brings as much as by 43 $ \times $ and by 73$ \times $ performance gain in MySQL and SQLite, respectively, against EXT4 via relaxing the durability of a transaction.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "24", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Lee:2018:MCM, author = "Minho Lee and Dong Hyun Kang and Young Ik Eom", title = "{M-CLOCK}: Migration-optimized Page Replacement Algorithm for Hybrid Memory Architecture", journal = j-TOS, volume = "14", number = "3", pages = "25:1--25:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3216730", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Phase Change Memory (PCM) has drawn great attention as a main memory due to its attractive characteristics such as non-volatility, byte-addressability, and in-place update. However, since the capacity of PCM is not fully mature yet, hybrid memory architecture that consists of DRAM and PCM has been suggested as a main memory. In addition, page replacement algorithm based on hybrid memory architecture is actively being studied, because existing page replacement algorithms cannot be used on hybrid memory architecture in that they do not consider the two weaknesses of PCM: high write latency and low endurance. In this article, to mitigate the above hardware limitations of PCM, we revisit the page cache layer for the hybrid memory architecture and propose a novel page replacement algorithm, called M-CLOCK, to improve the performance of hybrid memory architecture and the lifespan of PCM. In particular, M-CLOCK aims to reduce the number of PCM writes that negatively affect the performance of hybrid memory architecture. Experimental results clearly show that M-CLOCK outperforms the state-of-the-art page replacement algorithms in terms of the number of PCM writes and effective memory access time by up to 98\% and 9.4 times, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "25", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Shen:2018:DID, author = "Zhaoyan Shen and Feng Chen and Yichen Jia and Zili Shao", title = "{DIDACache}: an Integration of Device and Application for Flash-based Key-value Caching", journal = j-TOS, volume = "14", number = "3", pages = "26:1--26:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3203410", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Key-value caching is crucial to today's low-latency Internet services. Conventional key-value cache systems, such as Memcached, heavily rely on expensive DRAM memory. To lower Total Cost of Ownership, the industry recently is moving toward more cost-efficient flash-based solutions, such as Facebook's McDipper [14] and Twitter's Fatcache [56]. These cache systems typically take commercial SSDs and adopt a Memcached-like scheme to store and manage key-value cache data in flash. Such a practice, though simple, is inefficient due to the huge semantic gap between the key-value cache manager and the underlying flash devices. In this article, we advocate to reconsider the cache system design and directly open device-level details of the underlying flash storage for key-value caching. We propose an enhanced flash-aware key-value cache manager, which consists of a novel unified address mapping module, an integrated garbage collection policy, a dynamic over-provisioning space management, and a customized wear-leveling policy, to directly drive the flash management. A thin intermediate library layer provides a slab-based abstraction of low-level flash memory space and an API interface for directly and easily operating flash devices. A special flash memory SSD hardware that exposes flash physical details is adopted to store key-value items. This co-design approach bridges the semantic gap and well connects the two layers together, which allows us to leverage both the domain knowledge of key-value caches and the unique device properties. In this way, we can maximize the efficiency of key-value caching on flash devices while minimizing its weakness. We implemented a prototype, called DIDACache, based on the Open-Channel SSD platform. Our experiments on real hardware show that we can significantly increase the throughput by 35.5\%, reduce the latency by 23.6\%, and remove unnecessary erase operations by 28\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "26", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Luo:2018:WER, author = "Huizhang Luo and Qing Liu and Jingtong Hu and Qiao Li and Liang Shi and Qingfeng Zhuge and Edwin H.-M. Sha", title = "Write Energy Reduction for {PCM} via Pumping Efficiency Improvement", journal = j-TOS, volume = "14", number = "3", pages = "27:1--27:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3200139", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "The emerging Phase Change Memory (PCM) is considered to be a promising candidate to replace DRAM as the next generation main memory due to its higher scalability and lower leakage power. However, the high write power consumption has become a major challenge in adopting PCM as main memory. In addition to the fact that writing to PCM cells requires high write current and voltage, current loss in the charge pumps also contributes a large percentage of high power consumption. The pumping efficiency of a PCM chip is a concave function of the write current. Leveraging the characteristics of the concave function, the overall pumping efficiency can be improved if the write current is uniform. In this article, we propose a peak-to-average (PTA) write scheme, which smooths the write current fluctuation by regrouping write units. In particular, we calculate the current requirements for each write unit by their values when they are evicted from the last level cache (LLC). When the write units are waiting in the memory controller, we regroup the write units by LLC-assisted PTA to reach the current-uniform goal. Experimental results show that LLC-assisted PTA achieved 13.4\% of overall energy saving compared to the baseline.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "27", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Yan:2018:RRB, author = "Wenrui Yan and Jie Yao and Qiang Cao and Changsheng Xie and Hong Jiang", title = "{ROS}: a Rack-based Optical Storage System with Inline Accessibility for Long-Term Data Preservation", journal = j-TOS, volume = "14", number = "3", pages = "28:1--28:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3231599", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "The combination of the explosive growth in digital data and the demand to preserve much of these data in the long term has made it imperative to find a more cost-effective way than HDD arrays and a more easily accessible way than tape libraries to store massive amounts of data. While modern optical discs are capable of guaranteeing more than 50-year data preservation without media replacement, individual optical discs' lack of the performance and capacity relative to HDDs or tapes has significantly limited their use in datacenters. This article presents a Rack-scale Optical disc library System, or ROS in short, which provides a PB-level total capacity and inline accessibility on thousands of optical discs built within a 42U Rack. A rotatable roller and robotic arm separating and fetching discs are designed to improve disc placement density and simplify the mechanical structure. A hierarchical storage system based on SSDs, hard disks, and optical discs is proposed to effectively hide the delay of mechanical operation. However, an optical library file system (OLFS) based on FUSE is proposed to schedule mechanical operation and organize data on the tiered storage with a POSIX user interface to provide an illusion of inline data accessibility. We further optimize OLFS by reducing unnecessary user/kernel context switches inheriting from legacy FUSE framework. We evaluate ROS on a few key performance metrics, including operation delays of the mechanical structure and software overhead in a prototype PB-level ROS system. The results show that ROS stacked on Samba and FUSE as network-attached storage (NAS) mode almost saturates the throughput provided by underlying samba via 10GbE network for external users, as well as in this scenario provides about 53ms file write and 15ms read latency, exhibiting its inline accessibility. Besides, ROS is able to effectively hide and virtualize internal complex operational behaviors and be easily deployable in datacenters.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "28", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Desnoyers:2018:ISI, author = "Peter Desnoyers and Eyal de Lara", title = "Introduction to the Special Issue on {SYSTOR 2017}", journal = j-TOS, volume = "14", number = "4", pages = "29:1--29:??", month = dec, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3287097", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "29", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Trivedi:2018:FFN, author = "Animesh Trivedi and Nikolas Ioannou and Bernard Metzler and Patrick Stuedi and Jonas Pfefferle and Kornilios Kourtis and Ioannis Koltsidas and Thomas R. Gross", title = "{FlashNet}: Flash\slash Network Stack Co-Design", journal = j-TOS, volume = "14", number = "4", pages = "30:1--30:??", month = dec, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3239562", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "During the past decade, network and storage devices have undergone rapid performance improvements, delivering ultra-low latency and several Gbps of bandwidth. Nevertheless, current network and storage stacks fail to deliver this hardware performance to the applications, often due to the loss of I/O efficiency from stalled CPU performance. While many efforts attempt to address this issue solely on either the network or the storage stack, achieving high-performance for networked-storage applications requires a holistic approach that considers both. In this article, we present FlashNet, a software I/O stack that unifies high-performance network properties with flash storage access and management. FlashNet builds on RDMA principles and abstractions to provide a direct, asynchronous, end-to-end data path between a client and remote flash storage. The key insight behind FlashNet is to co-design the stack's components (an RDMA controller, a flash controller, and a file system) to enable cross-stack optimizations and maximize I/O efficiency. In micro-benchmarks, FlashNet improves 4kB network I/O operations per second (IOPS by 38.6\% to 1.22M), decreases access latency by 43.5\% to 50.4 $ \mu $ s, and prolongs the flash lifetime by 1.6--5.9$ \times $ for writes. We illustrate the capabilities of FlashNet by building a Key-Value store and porting a distributed data store that uses RDMA on it. The use of FlashNet's RDMA API improves the performance of KV store by $ 2 \times $ and requires minimum changes for the ported data store to access remote flash devices.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "30", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Guz:2018:PCN, author = "Zvika Guz and Harry (Huan) Li and Anahita Shayesteh and Vijay Balakrishnan", title = "Performance Characterization of {NVMe}-over-Fabrics Storage Disaggregation", journal = j-TOS, volume = "14", number = "4", pages = "31:1--31:??", month = dec, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3239563", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Storage disaggregation separates compute and storage to different nodes to allow for independent resource scaling and, thus, better hardware resource utilization. While disaggregation of hard-drives storage is a common practice, NVMe-SSD (i.e., PCIe-based SSD) disaggregation is considered more challenging. This is because SSDs are significantly faster than hard drives, so the latency overheads (due to both network and CPU processing) as well as the extra compute cycles needed for the offloading stack become much more pronounced. In this work, we characterize the overheads of NVMe-SSD disaggregation. We show that NVMe-over-Fabrics (NVMe-oF)-a recently released remote storage protocol specification-reduces the overheads of remote access to a bare minimum, thus greatly increasing the cost-efficiency of Flash disaggregation. Specifically, while recent work showed that SSD storage disaggregation via iSCSI degrades application-level throughput by 20\%, we report on negligible performance degradation with NVMe-oF-both when using stress-tests as well as with a more-realistic KV-store workload.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "31", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Xie:2018:EIP, author = "Wei Xie and Yong Chen and Philip C. Roth", title = "Exploiting Internal Parallelism for Address Translation in Solid-State Drives", journal = j-TOS, volume = "14", number = "4", pages = "32:1--32:??", month = dec, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3239564", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Solid-state Drives (SSDs) have changed the landscape of storage systems and present a promising storage solution for data-intensive applications due to their low latency, high bandwidth, and low power consumption compared to traditional hard disk drives. SSDs achieve these desirable characteristics using internal parallelism -parallel access to multiple internal flash memory chips-and a Flash Translation Layer (FTL) that determines where data are stored on those chips so that they do not wear out prematurely. However, current state-of-the-art cache-based FTLs like the Demand-based Flash Translation Layer (DFTL) do not allow IO schedulers to take full advantage of internal parallelism, because they impose a tight coupling between the logical-to-physical address translation and the data access. To address this limitation, we introduce a new FTL design called Parallel-DFTL that works with the DFTL to decouple address translation operations from data accesses. Parallel-DFTL separates address translation and data access operations into different queues, allowing the SSD to use concurrent flash accesses for both types of operations. We also present a Parallel-LRU cache replacement algorithm to improve the concurrency of address translation operations. To compare Parallel-DFTL against existing FTL approaches, we present a Parallel-DFTL performance model and compare its predictions against those for DFTL and an ideal page-mapping approach. We also implemented the Parallel-DFTL approach in an SSD simulator using real device parameters, and used trace-driven simulation to evaluate Parallel-DFTL's efficacy. Our evaluation results show that Parallel-DFTL improved the overall performance by up to 32\% for the real IO workloads we tested, and by up to two orders of magnitude with synthetic test workloads. We also found that Parallel-DFTL is able to achieve reasonable performance with a very small cache size and that it provides the best benefit for those workloads with large request size or with high write ratio.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "32", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Pletka:2018:MNG, author = "Roman Pletka and Ioannis Koltsidas and Nikolas Ioannou and Sasa Tomi{\'c} and Nikolaos Papandreou and Thomas Parnell and Haralampos Pozidis and Aaron Fry and Tim Fisher", title = "Management of Next-Generation {NAND} Flash to Achieve Enterprise-Level Endurance and Latency Targets", journal = j-TOS, volume = "14", number = "4", pages = "33:1--33:??", month = dec, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3241060", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Despite its widespread use in consumer devices and enterprise storage systems, NAND flash faces a growing number of challenges. While technology advances have helped to increase the storage density and reduce costs, they have also led to reduced endurance and larger block variations, which cannot be compensated solely by stronger ECC or read-retry schemes but have to be addressed holistically. Our goal is to enable low-cost NAND flash in enterprise storage for cost efficiency. We present novel flash-management approaches that reduce write amplification, achieve better wear leveling, and enhance endurance without sacrificing performance. We introduce block calibration, a technique to determine optimal read-threshold voltage levels that minimize error rates, and novel garbage-collection as well as data-placement schemes that alleviate the effects of block health variability and show how these techniques complement one another and thereby achieve enterprise storage requirements. By combining the proposed schemes, we improve endurance by up to 15$ \times $ compared to the baseline endurance of NAND flash without using a stronger ECC scheme. The flash-management algorithms presented herein were designed and implemented in simulators, hardware test platforms, and eventually in the flash controllers of production enterprise all-flash arrays. Their effectiveness has been validated across thousands of customer deployments since 2015.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "33", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Marmol:2018:LSA, author = "Leonardo Marmol and Mohammad Chowdhury and Raju Rangaswami", title = "{LibPM}: Simplifying Application Usage of Persistent Memory", journal = j-TOS, volume = "14", number = "4", pages = "34:1--34:??", month = dec, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3278141", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Persistent Memory devices present properties that are uniquely different from prior technologies for which applications have been built. Unfortunately, the conventional approach to building applications fail to either efficiently utilize these new devices or provide programmers a seamless development experience. We have built L ibPM, a Persistent Memory Library that implements an easy-to-use container abstraction for consuming PM. LibPM's containers are data hosting units that can store arbitrarily complex data types while preserving their integrity and consistency. Consequently, LibPM's containers provide a generic interface to applications, allowing applications to store and manipulate arbitrarily structured data with strong durability and consistency properties, all without having to navigate all the myriad pitfalls of programming PM directly. By providing a simple and high-performing transactional update mechanism, LibPM allows applications to manipulate persistent data at the speed of memory. The container abstraction and automatic persistent data discovery mechanisms within LibPM also simplify porting legacy applications to PM. From a performance perspective, LibPM closely matches and often exceeds the performance of state-of-the-art application libraries for PM. For instance, LibPM 's performance is 195$ \times $ better for write intensive workloads and 2.6$ \times $ better for read intensive workloads when compared with the state-of-the-art Pmem.IO persistent memory library.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "34", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Gatla:2018:TRF, author = "Om Rameshwar Gatla and Mai Zheng and Muhammad Hameed and Viacheslav Dubeyko and Adam Manzanares and Filip Blagojevic and Cyril Guyot and Robert Mateescu", title = "Towards Robust File System Checkers", journal = j-TOS, volume = "14", number = "4", pages = "35:1--35:??", month = dec, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3281031", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "File systems may become corrupted for many reasons despite various protection techniques. Therefore, most file systems come with a checker to recover the file system to a consistent state. However, existing checkers are commonly assumed to be able to complete the repair without interruption, which may not be true in practice. In this work, we demonstrate via fault injection experiments that checkers of widely used file systems (EXT4, XFS, BtrFS, and F2FS) may leave the file system in an uncorrectable state if the repair procedure is interrupted unexpectedly. To address the problem, we first fix the ordering issue in the undo logging of e2fsck and then build a general logging library (i.e., rfsck-lib) for strengthening checkers. To demonstrate the practicality, we integrate rfsck-lib with existing checkers and create two new checkers: rfsck-ext, a robust checker for Ext-family file systems, and rfsck-xfs, a robust checker for XFS file systems, both of which require only tens of lines of modification to the original versions. Both rfsck-ext and rfsck-xfs are resilient to faults in our experiments. Also, both checkers incur reasonable performance overhead (i.e., up to 12\%) compared to the original unreliable versions. Moreover, rfsck-ext outperforms the patched e2fsck by up to nine times while achieving the same level of robustness.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "35", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Choi:2018:HFC, author = "Jin-Yong Choi and Eyee Hyun Nam and Yoon Jae Seong and Jin Hyuk Yoon and Sookwan Lee and Hong Seok Kim and Jeongsu Park and Yeong-Jae Woo and Sheayun Lee and Sang Lyul Min", title = "{HIL}: a Framework for Compositional {FTL} Development and Provably-Correct Crash Recovery", journal = j-TOS, volume = "14", number = "4", pages = "36:1--36:??", month = dec, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3281030", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:49 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "We present a framework called Hierarchically Interacting Logs (HIL) for constructing Flash Translation Layers (FTLs). The main goal of the HIL framework is to heal the Achilles heel -the crash recovery-of FTLs (hence, its name). Nonetheless, the framework itself is general enough to encompass not only block-mapped and page-mapped FTLs but also many of their variants, including hybrid ones, because of its compositional nature. Crash recovery within the HIL framework proceeds in two phases: structural recovery and functional recovery. During the structural recovery, residual effects due to program operations ongoing at the time of the crash are eliminated in an atomic manner using shadow paging. During the functional recovery, operations that would have been performed if there had been no crash are replayed in a redo-only fashion. Both phases operate in an idempotent manner, preventing repeated crashes during recovery from causing any additional problems. We demonstrate the practicality of the proposed HIL framework by implementing a prototype and showing that its performance during normal execution and also during crash recovery is at least as good as those of state-of-the-art SSDs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "36", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{May:2019:LF, author = "Michael J. May and Etamar Laron and Khalid Zoabi and Havah Gerhardt", title = "On the Lifecycle of the File", journal = j-TOS, volume = "15", number = "1", pages = "1:1--1:??", month = apr, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3295463", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:50 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3295463", abstract = "Users and Operating Systems (OSs) have vastly different views of files. OSs use files to persist data and structured information. To accomplish this, OSs treat files as named collections of bytes managed in hierarchical file systems. Despite their critical role in computing, little attention is paid to the lifecycle of the file, the evolution of file contents, or the evolution of file metadata. In contrast, users have rich mental models of files: they group files into projects, send data repositories to others, work on documents over time, and stash them aside for future use. Current OSs and Revision Control Systems ignore such mental models, persisting a selective, manually designated history of revisions. Preserving the mental model allows applications to better match how users view their files, making file processing and archiving tools more effective. We propose two mechanisms that OSs can adopt to better preserve the mental model: File Lifecycle Events (FLEs) that record a file's progression and Complex File Events (CFEs) that combine them into meaningful patterns. We present the Complex File Events Engine (CoFEE), which uses file system monitoring and an extensible rulebase (Drools) to detect FLEs and convert them into complex ones. CFEs are persisted in NoSQL stores for later querying.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "1", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Noh:2019:ATD, author = "Sam H. Noh", title = "{ACM TOS} Distinguished Reviewers", journal = j-TOS, volume = "15", number = "1", pages = "1:1--1:??", month = apr, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3313879", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:50 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3313879", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "1e", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Luby:2019:LCS, author = "Michael Luby and Roberto Padovani and Thomas J. Richardson and Lorenz Minder and Pooja Aggarwal", title = "Liquid Cloud Storage", journal = j-TOS, volume = "15", number = "1", pages = "2:1--2:??", month = apr, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3281276", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:50 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3281276", abstract = "A liquid system provides durable object storage based on spreading redundantly generated data across a network of hundreds to thousands of potentially unreliable storage nodes. A liquid system uses a combination of a large code, lazy repair, and flow storage organization. We show that a liquid system can be operated to enable flexible and essentially optimal combinations of storage durability, storage overhead, repair bandwidth usage, and access performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "2", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Zhang:2019:LGF, author = "Yiming Zhang and Dongsheng Li and Ling Liu", title = "Leveraging Glocality for Fast Failure Recovery in Distributed {RAM} Storage", journal = j-TOS, volume = "15", number = "1", pages = "3:1--3:??", month = apr, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3289604", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:50 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3289604", abstract = "Distributed RAM storage aggregates the RAM of servers in data center networks (DCN) to provide extremely high I/O performance for large-scale cloud systems. For quick recovery of storage server failures, MemCube [53] exploits the proximity of the BCube network to limit the recovery traffic to the recovery servers' 1-hop neighborhood. However, the previous design is applicable only to the symmetric BCube( n, k ) network with n$^{k + 1}$ nodes and has suboptimal recovery performance due to congestion and contention. To address these problems, in this article, we propose CubeX, which (i) generalizes the ``1-hop'' principle of MemCube for arbitrary cube-based networks and (ii) improves the throughput and recovery performance of RAM-based key-value (KV) store via cross-layer optimizations. At the core of CubeX is to leverage the glocality (= globality + locality) of cube-based networks: It scatters backup data across a large number of disks globally distributed throughout the cube and restricts all recovery traffic within the small local range of each server node. Our evaluation shows that CubeX not only efficiently supports RAM-based KV store for cube-based networks but also significantly outperforms MemCube and RAMCloud in both throughput and recovery time.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "3", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Cao:2019:TTA, author = "Zhichao Cao and Hao Wen and Xiongzi Ge and Jingwei Ma and Jim Diehl and David H. C. Du", title = "{TDDFS}: a Tier-Aware Data Deduplication-Based File System", journal = j-TOS, volume = "15", number = "1", pages = "4:1--4:??", month = apr, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3295461", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:50 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3295461", abstract = "With the rapid increase in the amount of data produced and the development of new types of storage devices, storage tiering continues to be a popular way to achieve a good tradeoff between performance and cost-effectiveness. In a basic two-tier storage system, a storage tier with higher performance and typically higher cost (the fast tier) is used to store frequently-accessed (active) data while a large amount of less-active data are stored in the lower-performance and low-cost tier (the slow tier). Data are migrated between these two tiers according to their activity. In this article, we propose a Tier-aware Data Deduplication-based File System, called TDDFS, which can operate efficiently on top of a two-tier storage environment. Specifically, to achieve better performance, nearly all file operations are performed in the fast tier. To achieve higher cost-effectiveness, files are migrated from the fast tier to the slow tier if they are no longer active, and this migration is done with data deduplication. The distinctiveness of our design is that it maintains the non-redundant (unique) chunks produced by data deduplication in both tiers if possible. When a file is reloaded (called a reloaded file) from the slow tier to the fast tier, if some data chunks of the file already exist in the fast tier, then the data migration of these chunks from the slow tier can be avoided. Our evaluation shows that TDDFS achieves close to the best overall performance among various file-tiering designs for two-tier storage systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "4", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Yadgar:2019:ISI, author = "Gala Yadgar and Donald E. Porter", title = "Introduction to the Special Issue on {ACM International Systems and Storage Conference (SYSTOR) 2018}", journal = j-TOS, volume = "15", number = "1", pages = "5:1--5:??", month = apr, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3313898", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:50 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3313898", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "5", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Saad:2019:LPD, author = "Mohamed M. Saad and Roberto Palmieri and Binoy Ravindran", title = "{Lerna}: Parallelizing Dependent Loops Using Speculation", journal = j-TOS, volume = "15", number = "1", pages = "6:1--6:??", month = apr, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3310368", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:50 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3310368", abstract = "We present Lerna, an end-to-end tool that automatically and transparently detects and extracts parallelism from data-dependent sequential loops. Lerna uses speculation combined with a set of techniques including code profiling, dependency analysis, instrumentation, and adaptive execution. Speculation is needed to avoid conservative actions and detect actual conflicts. Lerna targets applications that are hard-to-parallelize due to data dependency. Our experimental study involves the parallelization of 13 applications with data dependencies. Results on a 24-core machine show an average of 2.7$ \times $ speedup for micro-benchmarks and 2.5$ \times $ for the macro-benchmarks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "6", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Pei:2019:RPU, author = "Shuyi Pei and Jing Yang and Qing Yang", title = "{REGISTOR}: a Platform for Unstructured Data Processing Inside {SSD} Storage", journal = j-TOS, volume = "15", number = "1", pages = "7:1--7:??", month = apr, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3310149", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:50 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3310149", abstract = "This article presents REGISTOR, a platform for regular expression grabbing inside storage. The main idea of Registor is accelerating regular expression (regex) search inside storage where large data set is stored, eliminating the I/O bottleneck problem. A special hardware engine for regex search is designed and augmented inside a flash SSD that processes data on-the-fly during data transmission from NAND flash to host. To make the speed of regex search match the internal bus speed of a modern SSD, a deep pipeline structure is designed in Registor hardware consisting of a file semantics extractor, matching candidates finder, regex matching units (REMUs), and results organizer. Furthermore, each stage of the pipeline makes the use of maximal parallelism possible. To make Registor readily usable by high-level applications, we have developed a set of APIs and libraries in Linux allowing Registor to process files in the SSD by recombining separate data blocks into files efficiently. A working prototype of Registor has been built in our newly designed NVMe-SSD. Extensive experiments and analyses have been carried out to show that Registor achieves high throughput, reduces the I/O bandwidth requirement by up to 97\%, and reduces CPU utilization by as much as 82\% for regex search in large datasets.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "7", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Gunawi:2019:ISS, author = "Haryadi Gunawi and Benjamin Reed", title = "Introduction to the Special Section on the 2018 {USENIX} Annual Technical Conference {(ATC'18)}", journal = j-TOS, volume = "15", number = "2", pages = "8:1--8:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3322100", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:50 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3322100", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "8", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Hu:2019:TLF, author = "Yige Hu and Zhiting Zhu and Ian Neal and Youngjin Kwon and Tianyu Cheng and Vijay Chidambaram and Emmett Witchel", title = "{TxFS}: Leveraging File-system Crash Consistency to Provide {ACID} Transactions", journal = j-TOS, volume = "15", number = "2", pages = "9:1--9:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3318159", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:50 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3318159", abstract = "We introduce TxFS, a transactional file system that builds upon a file system's atomic-update mechanism such as journaling. Though prior work has explored a number of transactional file systems, TxFS has a unique set of properties: a simple API, portability across different hardware, high performance, low complexity (by building on the file-system journal), and full ACID transactions. We port SQLite, OpenLDAP, and Git to use TxFS and experimentally show that TxFS provides strong crash consistency while providing equal or better performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "9", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Zhang:2019:CDS, author = "Yu Zhang and Jin Zhao and Xiaofei Liao and Hai Jin and Lin Gu and Haikun Liu and Bingsheng He and Ligang He", title = "{CGraph}: a Distributed Storage and Processing System for Concurrent Iterative Graph Analysis Jobs", journal = j-TOS, volume = "15", number = "2", pages = "10:1--10:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3319406", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:50 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3319406", abstract = "Distributed graph processing platforms usually need to handle massive Concurrent iterative Graph Processing (CGP) jobs for different purposes. However, existing distributed systems face high ratio of data access cost to computation for the CGP jobs, which incurs low throughput. We observed that there are strong spatial and temporal correlations among the data accesses issued by different CGP jobs, because these concurrently running jobs usually need to repeatedly traverse the shared graph structure for the iterative processing of each vertex. Based on this observation, this article proposes a distributed storage and processing system CGraph for the CGP jobs to efficiently handle the underlying static/evolving graph for high throughput. It uses a data-centric load-trigger-pushing model, together with several optimizations, to enable the CGP jobs to efficiently share the graph structure data in the cache/memory and their accesses by fully exploiting such correlations, where the graph structure data is decoupled from the vertex state associated with each job. It can deliver much higher throughput for the CGP jobs by effectively reducing their average ratio of data access cost to computation. Experimental results show that CGraph improves the throughput of the CGP jobs by up to 3.47$ \times $ in comparison with existing solutions on distributed platforms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "10", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Zhu:2019:STS, author = "Tao Zhu and Zhuoyue Zhao and Feifei Li and Weining Qian and Aoying Zhou and Dong Xie and Ryan Stutsman and Haining Li and Huiqi Hu", title = "{SolarDB}: Toward a Shared-Everything Database on Distributed Log-Structured Storage", journal = j-TOS, volume = "15", number = "2", pages = "11:1--11:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3318158", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:50 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3318158", abstract = "Efficient transaction processing over large databases is a key requirement for many mission-critical applications. Although modern databases have achieved good performance through horizontal partitioning, their performance deteriorates when cross-partition distributed transactions have to be executed. This article presents SolarDB, a distributed relational database system that has been successfully tested at a large commercial bank. The key features of SolarDB include (1) a shared-everything architecture based on a two-layer log-structured merge-tree; (2) a new concurrency control algorithm that works with the log-structured storage, which ensures efficient and non-blocking transaction processing even when the storage layer is compacting data among nodes in the background; and (3) find-grained data access to effectively minimize and balance network communication within the cluster. According to our empirical evaluations on TPC-C, Smallbank, and a real-world workload, SolarDB outperforms the existing shared-nothing systems by up to 50x when there are close to or more than 5\% distributed transactions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "11", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Arpaci-Dusseau:2019:ISS, author = "Andrea Arpaci-Dusseau and Geoffrey M. Voelker", title = "Introduction to the Special Section on {OSDI'18}", journal = j-TOS, volume = "15", number = "2", pages = "12:1--12:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3322101", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:50 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3322101", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "12", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Zuo:2019:LHH, author = "Pengfei Zuo and Yu Hua and Jie Wu", title = "Level Hashing: a High-performance and Flexible-resizing Persistent Hashing Index Structure", journal = j-TOS, volume = "15", number = "2", pages = "13:1--13:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3322096", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:50 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3322096", abstract = "Non-volatile memory (NVM) technologies as persistent memory are promising candidates to complement or replace DRAM for building future memory systems, due to having the advantages of high density, low power, and non-volatility. In main memory systems, hashing index structures are fundamental building blocks to provide fast query responses. However, hashing index structures originally designed for dynamic random access memory (DRAM) become inefficient for persistent memory due to new challenges including hardware limitations of NVM and the requirement of data consistency. To address these challenges, this article proposes level hashing, a write-optimized and high-performance hashing index scheme with low-overhead consistency guarantee and cost-efficient resizing. Level hashing provides a sharing-based two-level hash table, which achieves constant-scale worst-case time complexity for search, insertion, deletion, and update operations, and rarely incurs extra NVM writes. To guarantee the consistency with low overhead, level hashing leverages log-free consistency schemes for deletion, insertion, and resizing operations, and an opportunistic log-free scheme for update operation. To cost-efficiently resize this hash table, level hashing leverages an in-place resizing scheme that only needs to rehash 1/3 of buckets instead of the entire table to expand a hash table and rehash 2/3 of buckets to shrink a hash table, thus significantly improving the resizing performance and reducing the number of rehashed buckets. Extensive experimental results show that the level hashing speeds up insertions by 1.4$ \times $-3.0$ \times $, updates by 1.2$ \times $-2.1$ \times $, expanding by over 4.3$ \times $, and shrinking by over 1.4$ \times $ while maintaining high search and deletion performance compared with start-of-the-art hashing schemes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "13", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Mohan:2019:CAS, author = "Jayashree Mohan and Ashlie Martinez and Soujanya Ponnapalli and Pandian Raju and Vijay Chidambaram", title = "{CrashMonkey} and {ACE}: Systematically Testing File-System Crash Consistency", journal = j-TOS, volume = "15", number = "2", pages = "14:1--14:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3320275", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:50 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3320275", abstract = "We present C rashMonkey and Ace, a set of tools to systematically find crash-consistency bugs in Linux file systems. CrashMonkey is a record-and-replay framework which tests a given workload on the target file system by simulating power-loss crashes while the workload is being executed, and checking if the file system recovers to a correct state after each crash. Ace automatically generates all the workloads to be run on the target file system. We build CrashMonkey and Ace based on a new approach to test file-system crash consistency: bounded black-box crash testing ( B$^3$ ). B$^3$ tests the file system in a black-box manner using workloads of file-system operations. Since the space of possible workloads is infinite, B$^3$ bounds this space based on parameters such as the number of file-system operations or which operations to include, and exhaustively generates workloads within this bounded space. B$^3$ builds upon insights derived from our study of crash-consistency bugs reported in Linux file systems in the last 5 years. We observed that most reported bugs can be reproduced using small workloads of three or fewer file-system operations on a newly created file system, and that all reported bugs result from crashes after fsync()-related system calls. CrashMonkey and Ace are able to find 24 out of the 26 crash-consistency bugs reported in the last 5 years. Our tools also revealed 10 new crash-consistency bugs in widely used, mature Linux file systems, 7 of which existed in the kernel since 2014. Additionally, our tools found a crash-consistency bug in a verified file system, FSCQ. The new bugs result in severe consequences like broken rename atomicity, loss of persisted files and directories, and data loss.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "14", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Vangoor:2019:PRU, author = "Bharath Kumar Reddy Vangoor and Prafful Agarwal and Manu Mathew and Arun Ramachandran and Swaminathan Sivaraman and Vasily Tarasov and Erez Zadok", title = "Performance and Resource Utilization of {FUSE} User-Space File Systems", journal = j-TOS, volume = "15", number = "2", pages = "15:1--15:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3310148", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:50 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3310148", abstract = "Traditionally, file systems were implemented as part of operating systems kernels, which provide a limited set of tools and facilities to a programmer. As the complexity of file systems grew, many new file systems began being developed in user space. Low performance is considered the main disadvantage of user-space file systems but the extent of this problem has never been explored systematically. As a result, the topic of user-space file systems remains rather controversial: while some consider user-space file systems a ``toy'' not to be used in production, others develop full-fledged production file systems in user space. In this article, we analyze the design and implementation of a well-known user-space file system framework, FUSE, for Linux. We characterize its performance and resource utilization for a wide range of workloads. We present FUSE performance and also resource utilization with various mount and configuration options, using 45 different workloads that were generated using Filebench on two different hardware configurations. We instrumented FUSE to extract useful statistics and traces, which helped us analyze its performance bottlenecks and present our analysis results. Our experiments indicate that depending on the workload and hardware used, performance degradation (throughput) caused by FUSE can be completely imperceptible or as high as -83\%, even when optimized; and latencies of FUSE file system operations can be increased from none to 4$ \times $ when compared to Ext4. On the resource utilization side, FUSE can increase relative CPU utilization by up to 31\% and underutilize disk bandwidth by as much as -80\% compared to Ext4, though for many data-intensive workloads the impact was statistically indistinguishable. Our conclusion is that user-space file systems can indeed be used in production (non-``toy'') settings, but their applicability depends on the expected workloads.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "15", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Wen:2019:CTS, author = "Weidong Wen and Yang Li and Wenhai Li and Lingfeng Deng and Yanxiang He", title = "{CORES}: Towards Scan-Optimized Columnar Storage for Nested Records", journal = j-TOS, volume = "15", number = "3", pages = "16:1--16:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3321704", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:50 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3321704", abstract = "The relatively high cost of record deserialization is increasingly becoming the bottleneck of column-based storage systems in tree-structured applications [58]. Due to record transformation in the storage layer, unnecessary processing costs derived from fields and rows irrelevant to queries may be very heavy in nested schemas, significantly wasting the computational resources in large-scale analytical workloads. This leads to the question of how to reduce both the deserialization and IO costs of queries with highly selective filters following arbitrary paths in a nested schema. We present CORES (Column-Oriented Regeneration Embedding Scheme) to push highly selective filters down into column-based storage engines, where each filter consists of several filtering conditions on a field. By applying highly selective filters in the storage layer, we demonstrate that both the deserialization and IO costs could be significantly reduced. We show how to introduce fine-grained composition on filtering results. We generalize this technique by two pair-wise operations, rollup and drilldown, such that a series of conjunctive filters can effectively deliver their payloads in nested schema. The proposed methods are implemented on an open-source platform. For practical purposes, we highlight how to build a column storage engine and how to drive a query efficiently based on a cost model. We apply this design to the nested relational model especially when hierarchical entities are frequently required by ad hoc queries. The experiments, including a real workload and the modified TPCH benchmark, demonstrate that CORES improves the performance by 0.7$ \times $--26.9$ \times $ compared to state-of-the-art platforms in scan-intensive workloads.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "16", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Lu:2019:MSO, author = "Youyou Lu and Jiwu Shu and Jiacheng Zhang", title = "Mitigating Synchronous {I/O} Overhead in File Systems on Open-Channel {SSDs}", journal = j-TOS, volume = "15", number = "3", pages = "17:1--17:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3319369", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:50 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3319369", abstract = "Synchronous I/O has long been a design challenge in file systems. Although open-channel solid state drives (SSDs) provide better performance and endurance to file systems, they still suffer from synchronous I/Os due to the amplified writes and worse hot/cold data grouping. The reason lies in the controversy design choices between flash write and read/erase operations. While fine-grained logging improves performance and endurance in writes, it hurts indexing and data grouping efficiency in read and erase operations. In this article, we propose a flash-friendly data layout by introducing a built-in persistent staging layer to provide balanced read, write, and garbage collection performance. Based on this, we design a new flash file system (FS) named StageFS, which decouples the content and structure updates. Content updates are logically logged to the staging layer in a persistence-efficient way, which achieves better write performance and lower write amplification. The updated contents are reorganized into the normal data area for structure updates, with improved hot/cold grouping and in a page-level indexing way, which is more friendly to read and garbage collection operations. Evaluation results show that, compared to recent flash-friendly file system (F2FS), StageFS effectively improves performance by up to 211.4\% and achieves low garbage collection overhead for workloads with frequent synchronization.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "17", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Li:2019:ESS, author = "Yin Li and Xubin Chen and Ning Zheng and Jingpeng Hao and Tong Zhang", title = "An Exploratory Study on Software-Defined Data Center Hard Disk Drives", journal = j-TOS, volume = "15", number = "3", pages = "18:1--18:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3319405", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:50 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3319405", abstract = "This article presents a design framework aiming to reduce mass data storage cost in data centers. Its underlying principle is simple: Assume one may noticeably reduce the HDD manufacturing cost by significantly (i.e., at least several orders of magnitude) relaxing raw HDD reliability, which ensures the eventual data storage integrity via low-cost system-level redundancy. This is called system-assisted HDD bit cost reduction. To better utilize both capacity and random IOPS of HDDs, it is desirable to mix data with complementary requirements on capacity and random IOPS in each HDD. Nevertheless, different capacity and random IOPS requirements may demand different raw HDD reliability vs. bit cost trade-offs and hence different forms of system-assisted bit cost reduction. This article presents a software-centric design framework to realize data-adaptive system-assisted bit cost reduction for data center HDDs. Implementation is solely handled by the filesystem and demands only minor change of the error correction coding (ECC) module inside HDDs. Hence, it is completely transparent to all the other components in the software stack (e.g., applications, OS kernel, and drivers) and keeps fundamental HDD design practice (e.g., firmware, media, head, and servo) intact. We carried out analysis and experiments to evaluate its implementation feasibility and effectiveness. We integrated the design techniques into ext4 to further quantitatively measure its impact on system speed performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "18", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Xie:2019:ZZB, author = "Xuchao Xie and Liquan Xiao and David H. C. Du", title = "{ZoneTier}: a Zone-based Storage Tiering and Caching Co-design to Integrate {SSDs} with {SMR} Drives", journal = j-TOS, volume = "15", number = "3", pages = "19:1--19:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3335548", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:50 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3335548", abstract = "Integrating solid-state drives (SSDs) and host-aware shingled magnetic recording (HA-SMR) drives can potentially build a cost-effective high-performance storage system. However, existing SSD tiering and caching designs in such a hybrid system are not fully matched with the intrinsic properties of HA-SMR drives due to their lacking consideration of how to handle non-sequential writes (NSWs). We propose ZoneTier, a zone-based storage tiering and caching co-design, to effectively control all the NSWs by leveraging the host-aware property of HA-SMR drives. ZoneTier exploits real-time data layout of SMR zones to optimize zone placement, reshapes NSWs generated from zone demotions to SMR preferred sequential writes, and transforms the inevitable NSWs to cleaning-friendly write traffics for SMR zones. ZoneTier can be easily extended to match host-managed SMR drives using proactive cleaning policy. We implemented a prototype of ZoneTier with user space data management algorithms and real SSD and HA-SMR drives, which are manipulated by the functions provided by libzbc and libaio. Our experiments show that ZoneTier can reduce zone relocation overhead by 29.41\% on average, shorten performance recovery time of HA-SMR drives from cleaning by up to 33.37\%, and improve performance by up to 32.31\% than existing hybrid storage designs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "19", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Li:2019:EEU, author = "Yongkun Li and Helen H. W. Chan and Patrick P. C. Lee and Yinlong Xu", title = "Enabling Efficient Updates in {KV} Storage via Hashing: Design and Performance Evaluation", journal = j-TOS, volume = "15", number = "3", pages = "20:1--20:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3340287", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:50 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3340287", abstract = "Persistent key-value (KV) stores mostly build on the Log-Structured Merge (LSM) tree for high write performance, yet the LSM-tree suffers from the inherently high I/O amplification. KV separation mitigates I/O amplification by storing only keys in the LSM-tree and values in separate storage. However, the current KV separation design remains inefficient under update-intensive workloads due to its high garbage collection (GC) overhead in value storage. We propose HashKV, which aims for high update performance atop KV separation under update-intensive workloads. HashKV uses hash-based data grouping, which deterministically maps values to storage space to make both updates and GC efficient. We further relax the restriction of such deterministic mappings via simple but useful design extensions. We extensively evaluate various design aspects of HashKV. We show that HashKV achieves 4.6$ \times $ update throughput and 53.4\% less write traffic compared to the current KV separation design. In addition, we demonstrate that we can integrate the design of HashKV with state-of-the-art KV stores and improve their respective performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "20", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Wang:2019:AAD, author = "Ji Wang and Weidong Bao and Lei Zheng and Xiaomin Zhu and Philip S. Yu", title = "An Attention-augmented Deep Architecture for Hard Drive Status Monitoring in Large-scale Storage Systems", journal = j-TOS, volume = "15", number = "3", pages = "21:1--21:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3340290", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Sep 21 07:58:50 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3340290", abstract = "Data centers equipped with large-scale storage systems are critical infrastructures in the era of big data. The enormous amount of hard drives in storage systems magnify the failure probability, which may cause tremendous loss for both data service users and providers. Despite a set of reactive fault-tolerant measures such as RAID, it is still a tough issue to enhance the reliability of large-scale storage systems. Proactive prediction is an effective method to avoid possible hard-drive failures in advance. A series of models based on the SMART statistics have been proposed to predict impending hard-drive failures. Nonetheless, there remain some serious yet unsolved challenges like the lack of explainability of prediction results. To address these issues, we carefully analyze a dataset collected from a real-world large-scale storage system and then design an attention-augmented deep architecture for hard-drive health status assessment and failure prediction. The deep architecture, composed of a feature integration layer, a temporal dependency extraction layer, an attention layer, and a classification layer, cannot only monitor the status of hard drives but also assist in failure cause diagnoses. The experiments based on real-world datasets show that the proposed deep architecture is able to assess the hard-drive status and predict the impending failures accurately. In addition, the experimental results demonstrate that the attention-augmented deep architecture can reveal the degradation progression of hard drives automatically and assist administrators in tracing the cause of hard drive failures.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "21", fjournal = "ACM Transactions on Storage", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960", } @Article{Anonymous:2020:EM, author = "Anonymous", title = "{EIC} Message", journal = j-TOS, volume = "15", number = "4", pages = "1--2", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3372345", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Feb 6 08:15:19 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3372345", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Merchant:2020:ISI, author = "Arif Merchant and Hakim Weatherspoon", title = "Introduction to the Special Issue on {USENIX FAST 2019}", journal = j-TOS, volume = "15", number = "4", pages = "22e:1--22e:1", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3372347", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Feb 6 08:15:19 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3372347", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "22e", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Sivathanu:2020:ICF, author = "Muthian Sivathanu and Midhul Vuppalapati and Bhargav S. Gulavani and Kaushik Rajan and Jyoti Leeka and Jayashree Mohan and Piyus Kedia", title = "{INSTalytics}: Cluster Filesystem Co-design for Big-data Analytics", journal = j-TOS, volume = "15", number = "4", pages = "23:1--23:30", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3369738", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Feb 6 08:15:19 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3369738", abstract = "We present the design, implementation, and evaluation of INSTalytics, a co-designed stack of a cluster file system and the compute layer, for efficient big-data analytics in large-scale data centers. INSTalytics amplifies the well-known benefits of data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "23", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Harnik:2020:SVC, author = "Danny Harnik and Moshik Hershcovitch and Yosef Shatsky and Amir Epstein and Ronen Kat", title = "Sketching Volume Capacities in Deduplicated Storage", journal = j-TOS, volume = "15", number = "4", pages = "24:1--24:23", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3369737", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Feb 6 08:15:19 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3369737", abstract = "The adoption of deduplication in storage systems has introduced significant new challenges for storage management. Specifically, the physical capacities associated with volumes are no longer readily available. In this work, we introduce a new approach \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "24", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Kesavan:2020:CFE, author = "Ram Kesavan and Matthew Curtis-Maury and Vinay Devadas and Kesari Mishra", title = "Countering Fragmentation in an Enterprise Storage System", journal = j-TOS, volume = "15", number = "4", pages = "25:1--25:35", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3366173", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Feb 6 08:15:19 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3366173", abstract = "As a file system ages, it can experience multiple forms of fragmentation. Fragmentation of the free space in the file system can lower write performance and subsequent read performance. Client operations as well as internal operations, such as \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "25", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Xie:2020:COB, author = "Bing Xie and Sarp Oral and Christopher Zimmer and Jong Youl Choi and David Dillow and Scott Klasky and Jay Lofstead and Norbert Podhorszki and Jeffrey S. Chase", title = "Characterizing Output Bottlenecks of a Production Supercomputer: Analysis and Implications", journal = j-TOS, volume = "15", number = "4", pages = "26:1--26:39", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3335205", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Feb 6 08:15:19 MST 2020", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/super.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3335205", abstract = "This article studies the I/O write behaviors of the Titan supercomputer and its Lustre parallel file stores under production load. The results can inform the design, deployment, and configuration of file systems along with the design of I/O software in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "26", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zhang:2020:DDD, author = "Guangyan Zhang and Zhufan Wang and Xiaosong Ma and Songlin Yang and Zican Huang and Weimin Zheng", title = "Determining Data Distribution for Large Disk Enclosures with {$3$-D} Data Templates", journal = j-TOS, volume = "15", number = "4", pages = "27:1--27:38", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3342858", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Feb 6 08:15:19 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3342858", abstract = "Conventional RAID solutions with fixed layouts partition large disk enclosures so that each RAID group uses its own disks exclusively. This achieves good performance isolation across underlying disk groups, at the cost of disk under-utilization and slow \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "27", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Kang:2020:LVC, author = "Dong Hyun Kang and Sang-Won Lee and Young Ik Eom", title = "{LDJ}: Version Consistency Is Almost Free on Commercial Storage Devices", journal = j-TOS, volume = "15", number = "4", pages = "28:1--28:20", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3365918", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Feb 6 08:15:19 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3365918", abstract = "In this article, we propose a simple but practical and efficient optimization scheme for journaling in ext4, called lightweight data journaling (LDJ). By compressing journaled data prior to writing, LDJ can perform comparable to or even faster than the default ordered journaling (OJ) mode in ext4 on top of both HDDs and flash storage devices, while still guaranteeing the version consistency of the data journaling (DJ) mode. This surprising result can be explained with three main reasons. First, on modern storage devices, the sequential write pattern dominating in DJ mode is more and more high-performant than the random one in OJ mode. Second, the compression significantly reduces the amount of journal writes, which will in turn make the write completion faster and prolong the lifespan of storage devices. Third, the compression also enables the atomicity of each journal write without issuing an intervening FLUSH command between journal data blocks and commit block, thus halving the number of costly FLUSH calls in LDJ. We have prototyped our LDJ by slightly modifying the existing ext4 with jbd2 for journaling and also e2fsck for recovery; less than 300 lines of source code were changed. Also, we carried out a comprehensive evaluation using four standard benchmarks and three real applications. Our evaluation results clearly show that LDJ outperforms the OJ mode by up to $ 9.6 \times $ on the real applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "28", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Kumar:2020:GDS, author = "Pradeep Kumar and H. Howie Huang", title = "{GraphOne}: a Data Store for Real-time Analytics on Evolving Graphs", journal = j-TOS, volume = "15", number = "4", pages = "29:1--29:40", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3364180", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Feb 6 08:15:19 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3364180", abstract = "There is a growing need to perform a diverse set of real-time analytics (batch and stream analytics) on evolving graphs to deliver the values of big data to users. The key requirement from such applications is to have a data store to support their \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "29", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Malkhi:2020:ISS, author = "Dahlia Malkhi and Dan Tsafrir", title = "Introduction to the Special Section on {USENIX ATC 2019}", journal = j-TOS, volume = "16", number = "1", pages = "1:1--1:1", month = apr, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3383194", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Apr 8 11:43:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3383194", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "1", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Jaffer:2020:RMF, author = "Shehbaz Jaffer and Stathis Maneas and Andy Hwang and Bianca Schroeder", title = "The Reliability of Modern File Systems in the face of {SSD} Errors", journal = j-TOS, volume = "16", number = "1", pages = "2:1--2:28", month = apr, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3375553", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Apr 8 11:43:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3375553", abstract = "As solid state drives (SSDs) are increasingly replacing hard disk drives, the reliability of storage systems depends on the failure modes of SSDs and the ability of the file system layered on top to handle these failure modes. While the classical paper on IRON File Systems provides a thorough study of the failure policies of three file systems common at the time, we argue that 13 years later it is time to revisit file system reliability with SSDs and their reliability characteristics in mind, based on modern file systems that incorporate journaling, copy-on-write, and log-structured approaches and are optimized for flash. This article presents a detailed study, spanning ext4, Btrfs, and F2FS, and covering a number of different SSD error modes. We develop our own fault injection framework and explore over 1,000 error cases. Our results indicate that 16\% of these cases result in a file system that cannot be mounted or even repaired by its system checker. We also identify the key file system metadata structures that can cause such failures, and, finally, we recommend some design guidelines for file systems that are deployed on top of SSDs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "2", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Kuszmaul:2020:ELF, author = "Bradley C. Kuszmaul and Matteo Frigo and Justin Mazzola Paluska and Alexander (Sasha) Sandler", title = "Everyone Loves File: {Oracle File Storage Service}", journal = j-TOS, volume = "16", number = "1", pages = "3:1--3:29", month = apr, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3377877", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Apr 8 11:43:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3377877", abstract = "Oracle File Storage Service (FSS) is an elastic filesystem provided as a managed NFS service. A pipelined Paxos implementation underpins a scalable block store that provides linearizable multipage limited-size transactions. Above the block store, a scalable B-tree holds filesystem metadata and provides linearizable multikey limited-size transactions. Self-validating B-tree nodes and housekeeping operations performed as separate transactions allow each key in a B-tree transaction to require only one page in the underlying block transaction. The filesystem provides snapshots by using versioned key-value pairs. The system is programmed using a nonblocking lock-free programming style. Presentation servers maintain no persistent local state making them scalable and easy to failover. A non-scalable Paxos-replicated hash table holds configuration information required to bootstrap the system. An additional B-tree provides conversational multi-key minitransactions for control-plane information. The system throughput can be predicted by comparing an estimate of the network bandwidth needed for replication to the network bandwidth provided by the hardware. Latency on an unloaded system is about 4 times higher than a Linux NFS server backed by NVMe, reflecting the cost of replication. FSS has been in production since January 2018 and holds tens of thousands of customer file systems comprising many petabytes of data.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "3", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Li:2020:ILE, author = "Jingwei Li and Patrick P. C. Lee and Chufeng Tan and Chuan Qin and Xiaosong Zhang", title = "Information Leakage in Encrypted Deduplication via Frequency Analysis: Attacks and Defenses", journal = j-TOS, volume = "16", number = "1", pages = "4:1--4:30", month = apr, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3365840", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Apr 8 11:43:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3365840", abstract = "Encrypted deduplication combines encryption and deduplication to simultaneously achieve both data security and storage efficiency. State-of-the-art encrypted deduplication systems mainly build on deterministic encryption to preserve deduplication effectiveness. However, such deterministic encryption reveals the underlying frequency distribution of the original plaintext chunks. This allows an adversary to launch frequency analysis against the ciphertext chunks and infer the content of the original plaintext chunks. In this article, we study how frequency analysis affects information leakage in encrypted deduplication, from both attack and defense perspectives. Specifically, we target backup workloads and propose a new inference attack that exploits chunk locality to increase the coverage of inferred chunks. We further combine the new inference attack with the knowledge of chunk sizes and show its attack effectiveness against variable-size chunks. We conduct trace-driven evaluation on both real-world and synthetic datasets and show that our proposed attacks infer a significant fraction of plaintext chunks under backup workloads. To defend against frequency analysis, we present two defense approaches, namely MinHash encryption and scrambling. Our trace-driven evaluation shows that our combined MinHash encryption and scrambling scheme effectively mitigates the severity of the inference attacks, while maintaining high storage efficiency and incurring limited metadata access overhead.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "4", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zhang:2020:CFF, author = "Shuanglong Zhang and Robert Roy and Leah Rumancik and An-I Andy Wang", title = "The Composite-File File System: Decoupling One-to-One Mapping of Files and Metadata for Better Performance", journal = j-TOS, volume = "16", number = "1", pages = "5:1--5:18", month = apr, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3366684", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Apr 8 11:43:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3366684", abstract = "The design and implementation of traditional file systems typically use the one-to-one mapping of logical files to their physical metadata representations. File system optimizations generally follow this rigid mapping and miss opportunities for an entire class of optimizations. We designed, implemented, and evaluated a composite-file file system, which allows many-to-one mappings of files to metadata. Through exploring different mapping strategies, our empirical evaluation shows up to a 27\% performance improvement under web server and software development workloads, for both disks and SSDs. This result demonstrates that our approach of relaxing file-to-metadata mapping is promising.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "5", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zhang:2020:PEE, author = "Yiming Zhang and Huiba Li and Shengyun Liu and Jiawei Xu and Guangtao Xue", title = "{PBS}: an Efficient Erasure-Coded Block Storage System Based on Speculative Partial Writes", journal = j-TOS, volume = "16", number = "1", pages = "6:1--6:25", month = apr, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3365839", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Apr 8 11:43:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3365839", abstract = "Block storage provides virtual disks that can be mounted by virtual machines (VMs). Although erasure coding (EC) has been widely used in many cloud storage systems for its high efficiency and durability, current EC schemes cannot provide high-performance block storage for the cloud. This is because they introduce significant overhead to small write operations (which perform partial write to an entire EC group), whereas cloud-oblivious applications running on VMs are often small-write-intensive. We identify the root cause for the poor performance of partial writes in state-of-the-art EC schemes: for each partial write, they have to perform a time-consuming write-after-read operation that reads the current value of the data and then computes and writes the parity delta, which will be used to patch the parity in journal replay. In this article, we present a speculative partial write scheme (called PARIX) that supports fast small writes in erasure-coded storage systems. We transform the original formula of parity calculation to use the data deltas (between the current/original data values), instead of the parity deltas, to calculate the parities in journal replay. For each partial write, this allows PARIX to speculatively log only the new value of the data without reading its original value. For a series of $n$ partial writes to the same data, PARIX performs pure write (instead of write-after-read) for the last $ n - 1$ ones while only introducing a small penalty of an extra network round-trip time to the first one. Based on PARIX, we design and implement PARIX Block Storage (PBS), an efficient block storage system that provides high-performance virtual disk service for VMs running cloud-oblivious applications. PBS not only supports fast partial writes but also realizes efficient full writes, background journal replay, and fast failure recovery with strong consistency guarantees. Both microbenchmarks and trace-driven evaluation show that PBS provides efficient block storage and outperforms state-of-the-art EC-based systems by orders of magnitude.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "6", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zhou:2020:FEC, author = "Tianli Zhou and Chao Tian", title = "Fast Erasure Coding for Data Storage: a Comprehensive Study of the Acceleration Techniques", journal = j-TOS, volume = "16", number = "1", pages = "7:1--7:24", month = apr, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3375554", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Apr 8 11:43:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3375554", abstract = "Various techniques have been proposed in the literature to improve erasure code computation efficiency, including optimizing bitmatrix design and computation schedule, common XOR (exclusive-OR) operation reduction, caching management techniques, and vectorization techniques. These techniques were largely proposed individually, and, in this work, we seek to use them jointly. To accomplish this task, these techniques need to be thoroughly evaluated individually and their relation better understood. Building on extensive testing, we develop methods to systematically optimize the computation chain together with the underlying bitmatrix. This led to a simple design approach of optimizing the bitmatrix by minimizing a weighted computation cost function, and also a straightforward coding procedure follow a computation schedule produced from the optimized bitmatrix to apply XOR-level vectorization. This procedure provides better performances than most existing techniques (e.g., those used in ISA-L and Jerasure libraries), and sometimes can even compete against well-known but less general codes such as EVENODD, RDP, and STAR codes. One particularly important observation is that vectorizing the XOR operations is a better choice than directly vectorizing finite field operations, not only because of the flexibility in choosing finite field size and the better encoding throughput, but also its minimal migration efforts onto newer CPUs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "7", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Arpaci-Dusseau:2020:ISS, author = "Remzi H. Arpaci-Dusseau and Yuanyuan (YY) Zhou", title = "Introduction to the Special Section on {SOSP 2019}", journal = j-TOS, volume = "16", number = "2", pages = "8:1--8:1", month = jun, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3395778", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sun Jun 14 08:20:04 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3395778", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "8", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Aghayev:2020:CCS, author = "Abutalib Aghayev and Sage Weil and Michael Kuchnik and Mark Nelson and Gregory R. Ganger and George Amvrosiadis", title = "The Case for Custom Storage Backends in Distributed Storage Systems", journal = j-TOS, volume = "16", number = "2", pages = "9:1--9:31", month = jun, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3386362", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sun Jun 14 08:20:04 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3386362", abstract = "For a decade, the Ceph distributed file system followed the conventional wisdom of building its storage backend on top of local file systems. This is a preferred choice for most distributed file systems today, because it allows them to benefit from the convenience and maturity of battle-tested code. Ceph's experience, however, shows that this comes at a high price. First, developing a zero-overhead transaction mechanism is challenging. Second, metadata performance at the local level can significantly affect performance at the distributed level. Third, supporting emerging storage hardware is painstakingly slow.\par Ceph addressed these issues with BlueStore, a new backend designed to run directly on raw storage devices. In only two years since its inception, BlueStore outperformed previous established backends and is adopted by 70\% of users in production. By running in user space and fully controlling the I/O stack, it has enabled space-efficient metadata and data checksums, fast overwrites of erasure-coded data, inline compression, decreased performance variability, and avoided a series of performance pitfalls of local file systems. Finally, it makes the adoption of backward-incompatible storage hardware possible, an important trait in a changing storage landscape that is learning to embrace hardware diversity.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "9", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Kim:2020:FBF, author = "Seulbae Kim and Meng Xu and Sanidhya Kashyap and Jungyeon Yoon and Wen Xu and Taesoo Kim", title = "Finding Bugs in File Systems with an Extensible Fuzzing Framework", journal = j-TOS, volume = "16", number = "2", pages = "10:1--10:35", month = jun, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3391202", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sun Jun 14 08:20:04 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3391202", abstract = "File systems are too large to be bug free. Although handwritten test suites have been widely used to stress file systems, they can hardly keep up with the rapid increase in file system size and complexity, leading to new bugs being introduced. These bugs come in various flavors: buffer overflows to complicated semantic bugs. Although bug-specific checkers exist, they generally lack a way to explore file system states thoroughly. More importantly, no turnkey solution exists that unifies the checking effort of various aspects of a file system under one umbrella.\par In this article, to highlight the potential of applying fuzzing to find any type of file system bugs in a generic way, we propose Hydra, an extensible fuzzing framework. Hydra provides building blocks for file system fuzzing, including input mutators, feedback engines, test executors, and bug post-processors. As a result, developers only need to focus on building the core logic for finding bugs of their interests. We showcase the effectiveness of Hydra with four checkers that hunt crash inconsistency, POSIX violations, logic assertion failures, and memory errors. So far, Hydra has discovered 157 new bugs in Linux file systems, including three in verified file systems (FSCQ and Yxv6).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "10", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Kolosov:2020:FTL, author = "Oleg Kolosov and Gala Yadgar and Matan Liram and Itzhak Tamo and Alexander Barg", title = "On Fault Tolerance, Locality, and Optimality in Locally Repairable Codes", journal = j-TOS, volume = "16", number = "2", pages = "11:1--11:32", month = jun, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3381832", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sun Jun 14 08:20:04 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3381832", abstract = "Erasure codes in large-scale storage systems allow recovery of data from a failed node. A recently developed class of codes, locally repairable codes (LRCs), offers tradeoffs between storage overhead and repair cost. LRCs facilitate efficient recovery scenarios by adding parity blocks to the system. However, these additional blocks may eventually increase the number of blocks that must be reconstructed. Existing LRCs differ in their use of the parity blocks, in their locality semantics, and in their parameter space. Thus, existing theoretical models cannot directly compare different LRCs to determine which code offers the best recovery performance, and at what cost. We perform the first systematic comparison of existing LRC approaches. We analyze Xorbas, Azure's LRCs, and Optimal-LRCs in light of two new metrics: average degraded read cost and normalized repair cost. We show the tradeoff between these costs and the code's fault tolerance, and that different approaches offer different choices in this tradeoff. Our experimental evaluation on a Ceph cluster further demonstrates the different effects of realistic system bottlenecks on the benefit from each LRC approach. Despite these differences, the normalized repair cost metric can reliably identify the LRC approach that would achieve the lowest repair cost in each setup.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "11", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Yang:2020:SEF, author = "Fan Yang and Youmin Chen and Haiyu Mao and Youyou Lu and Jiwu Shu", title = "{ShieldNVM}: an Efficient and Fast Recoverable System for Secure Non-Volatile Memory", journal = j-TOS, volume = "16", number = "2", pages = "12:1--12:31", month = jun, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3381835", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sun Jun 14 08:20:04 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3381835", abstract = "Data encryption and authentication are essential for secure non-volatile memory (NVM). However, the introduced security metadata needs to be atomically written back to NVM along with data, so as to provide crash consistency, which unfortunately incurs high overhead. To support fine-grained data protection and fast recovery for a secure NVM system without compromising the performance, we propose ShieldNVM. It first proposes an epoch-based mechanism to aggressively cache the security metadata in the metadata cache while retaining the consistency of them in NVM. Deferred spreading is also introduced to reduce the calculating overhead for data authentication. Leveraging the ability of data hash message authentication codes, we can always recover the consistent but old security metadata to its newest version. By recording a limited number of dirty addresses of the security metadata, ShieldNVM achieves fast recovering the secure NVM system after crashes. Compared to Osiris, a state-of-the-art secure NVM, ShieldNVM reduces system runtime by 39.1\% and hash message authentication code computation overhead by 80.5\% on average over NVM workloads. When system crashes happen, ShieldNVM's recovery time is orders of magnitude faster than Osiris. In addition, ShieldNVM also recovers faster than AGIT, which is the Osiris-based state-of-the-art mechanism addressing the recovery time of the secure NVM system. Once the recovery process fails, instead of dropping all data due to malicious attacks, ShieldNVM is able to detect and locate the area of the tampered data with the help of the tracked addresses.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "12", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Matsuzawa:2020:PQF, author = "Keiichi Matsuzawa and Mitsuo Hayasaka and Takahiro Shinagawa", title = "Practical Quick File Server Migration", journal = j-TOS, volume = "16", number = "2", pages = "13:1--13:30", month = jun, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3377322", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sun Jun 14 08:20:04 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3377322", abstract = "Regular file server upgrades are indispensable to improve performance, robustness, and power consumption. In upgrading file servers, it is crucial to quickly migrate file-sharing services between heterogeneous servers with little downtime while minimizing performance interference. We present a practical quick file server migration scheme based on the postcopy approach that defers file copy until after switching servers. This scheme can (1) reduce downtime with on-demand file migration, (2) avoid performance interference using background migration, and (3) support heterogeneous servers with stub-based file management. We discuss several practical issues, such as intermittent crawling and traversal strategy, and present the solutions in our scheme. We also address several protocol-specific issues to achieve a smooth migration. This scheme is good enough to be adopted in production systems, as it has been demonstrated for several years in real operational environments. The performance evaluation demonstrates that the downtime is less than 3 seconds, and the first file access after switching servers does not cause a timeout in the default timeout settings; it takes less than 10 seconds in most cases and up to 84.55 seconds even in a large directory tree with a depth of 16 and a width of 1,000. Although the total migration time is approximately 3 times longer than the traditional precopy approach that copies all files in advance, our scheme allows the clients to keep accessing files with acceptable overhead. We also show that appropriate selection of traversal strategy reduces tail latency by 88\%, and the overhead after the migration is negligible.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "13", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Jia:2020:SED, author = "Yichen Jia and Zili Shao and Feng Chen", title = "{SlimCache}: an Efficient Data Compression Scheme for Flash-based Key-value Caching", journal = j-TOS, volume = "16", number = "2", pages = "14:1--14:34", month = jun, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3383124", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sun Jun 14 08:20:04 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3383124", abstract = "Flash-based key-value caching is becoming popular in data centers for providing high-speed key-value services. These systems adopt slab-based space management on flash and provide a low-cost solution for key-value caching. However, optimizing cache efficiency for flash-based key-value cache systems is highly challenging, due to the huge number of key-value items and the unique technical constraints of flash devices. In this article, we present a dynamic on-line compression scheme, called SlimCache, to improve the cache hit ratio by virtually expanding the usable cache space through data compression. We have investigated the effect of compression granularity to achieve a balance between compression ratio and speed, and we leveraged the unique workload characteristics in key-value systems to efficiently identify and separate hot and cold data. To dynamically adapt to workload changes during runtime, we have designed an adaptive hot/cold area partitioning method based on a cost model. To avoid unnecessary compression, SlimCache also estimates data compressibility to determine whether the data are suitable for compression or not. We have implemented a prototype based on Twitter's Fatcache. Our experimental results show that SlimCache can accommodate more key-value items in flash by up to 223.4\%, effectively increasing throughput and reducing average latency by up to 380.1\% and 80.7\%, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "14", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Kwak:2020:COR, author = "Jaewook Kwak and Sangjin Lee and Kibin Park and Jinwoo Jeong and Yong Ho Song", title = "{Cosmos+ OpenSSD}: Rapid Prototype for Flash Storage Systems", journal = j-TOS, volume = "16", number = "3", pages = "15:1--15:35", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3385073", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Aug 15 07:00:37 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3385073", abstract = "As semiconductor technology has advanced, many storage systems have begun to use non-volatile memories as storage media. The organization and architecture of storage controllers have become more complex to meet various design requirements in terms of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "15", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Sun:2020:SEF, author = "Kuei Sun and Daniel Fryer and Russell Wang and Sagar Patel and Joseph Chu and Matthew Lakier and Angela Demke Brown and Ashvin Goel", title = "{Spiffy}: Enabling File-System Aware Storage Applications", journal = j-TOS, volume = "16", number = "3", pages = "16:1--16:39", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3386368", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Aug 15 07:00:37 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3386368", abstract = "Many file-system applications such as defragmentation tools, file-system checkers, or data recovery tools, operate at the storage layer. Today, developers of these file-system aware storage applications require detailed knowledge of the file-system \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "16", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Cha:2020:BTB, author = "Hokeun Cha and Moohyeon Nam and Kibeom Jin and Jiwon Seo and Beomseok Nam", title = "{B$^3$-Tree}: Byte-Addressable Binary {B}-Tree for Persistent Memory", journal = j-TOS, volume = "16", number = "3", pages = "17:1--17:27", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3394025", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Aug 15 07:00:37 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3394025", abstract = "In this work, we propose B$^3$ -tree, a hybrid index for persistent memory that leverages the byte-addressability of the in-memory index and the page locality of B-trees. As in the byte-addressable in-memory index, B$^3$-tree is updated by 8-byte store \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "17", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Wang:2020:CWY, author = "Hua Wang and Jiawei Zhang and Ping Huang and Xinbo Yi and Bin Cheng and Ke Zhou", title = "Cache What You Need to Cache: Reducing Write Traffic in Cloud Cache via {``One-Time-Access-Exclusion''} Policy", journal = j-TOS, volume = "16", number = "3", pages = "18:1--18:24", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3397766", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Aug 15 07:00:37 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3397766", abstract = "The SSD has been playing a significantly important role in caching systems due to its high performance-to-cost ratio. Since the cache space is typically much smaller than that of the backend storage by one order of magnitude or even more, write density \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "18", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Yang:2020:BFO, author = "Yang Yang and Qiang Cao and Jie Yao and Hong Jiang and Li Yang", title = "Batch-file Operations to Optimize Massive Files Accessing: Analysis, Design, and Application", journal = j-TOS, volume = "16", number = "3", pages = "19:1--19:25", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3394286", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Sat Aug 15 07:00:37 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3394286", abstract = "Existing local file systems, designed to support a typical single-file access mode only, can lead to poor performance when accessing a batch of files, especially small files. This single-file mode essentially serializes accesses to batched files one by \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "19", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Kim:2020:ISS, author = "Jin-Soo Kim and Yang Seok Ki and Erik Riedel", title = "Introduction to the Special Section on Computational Storage", journal = j-TOS, volume = "16", number = "4", pages = "20:1--20:1", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3425305", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Feb 5 11:10:27 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3425305", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "20", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Do:2020:CEE, author = "Jaeyoung Do and Victor C. Ferreira and Hossein Bobarshad and Mahdi Torabzadehkashi and Siavash Rezaei and Ali Heydarigorji and Diego Souza and Brunno F. Goldstein and Leandro Santiago and Min Soo Kim and Priscila M. V. Lima and Felipe M. G. Fran{\c{c}}a and Vladimir Alves", title = "Cost-effective, Energy-efficient, and Scalable Storage Computing for Large-scale {AI} Applications", journal = j-TOS, volume = "16", number = "4", pages = "21:1--21:37", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3415580", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Feb 5 11:10:27 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3415580", abstract = "The growing volume of data produced continuously in the Cloud and at the Edge poses significant challenges for large-scale AI applications to extract and learn useful information from the data in a timely and efficient way. The goal of this article is \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "21", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Kougkas:2020:BSS, author = "Anthony Kougkas and Hariharan Devarajan and Xian-He Sun", title = "Bridging Storage Semantics Using Data Labels and Asynchronous {I/O}", journal = j-TOS, volume = "16", number = "4", pages = "22:1--22:34", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3415579", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Feb 5 11:10:27 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3415579", abstract = "In the era of data-intensive computing, large-scale applications, in both scientific and the BigData communities, demonstrate unique I/O requirements leading to a proliferation of different storage devices and software stacks, many of which have \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "22", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zheng:2020:SDR, author = "Qing Zheng and Charles D. Cranor and Ankush Jain and Gregory R. Ganger and Garth A. Gibson and George Amvrosiadis and Bradley W. Settlemyer and Gary Grider", title = "Streaming Data Reorganization at Scale with {DeltaFS} Indexed Massive Directories", journal = j-TOS, volume = "16", number = "4", pages = "23:1--23:31", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3415581", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Feb 5 11:10:27 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3415581", abstract = "Complex storage stacks providing data compression, indexing, and analytics help leverage the massive amounts of data generated today to derive insights. It is challenging to perform this computation, however, while fully utilizing the underlying storage \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "23", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Shu:2020:TDD, author = "Jiwu Shu and Youmin Chen and Qing Wang and Bohong Zhu and Junru Li and Youyou Lu", title = "{TH-DPMS}: Design and Implementation of an {RDMA}-enabled {Distributed Persistent Memory Storage System}", journal = j-TOS, volume = "16", number = "4", pages = "24:1--24:31", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3412852", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Feb 5 11:10:27 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3412852", abstract = "The rapidly increasing data in recent years requires the datacenter infrastructure to store and process data with extremely high throughput and low latency. Fortunately, persistent memory (PM) and RDMA technologies bring new opportunities towards this \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "24", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Ji:2020:ICA, author = "Cheng Ji and Riwei Pan and Li-Pin Chang and Liang Shi and Zongwei Zhu and Yu Liang and Tei-Wei Kuo and Chun Jason Xue", title = "Inspection and Characterization of App File Usage in Mobile Devices", journal = j-TOS, volume = "16", number = "4", pages = "25:1--25:25", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3404119", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Feb 5 11:10:27 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3404119", abstract = "While the computing power of mobile devices has been quickly evolving in recent years, the growth of mobile storage capacity is, however, relatively slower. A common problem shared by budget-phone users is that they frequently run out of storage space. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "25", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Ye:2020:HCF, author = "Liuqing Ye and Dan Feng and Yuchong Hu and Xueliang Wei", title = "Hybrid Codes: Flexible Erasure Codes with Optimized Recovery Performance", journal = j-TOS, volume = "16", number = "4", pages = "26:1--26:26", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3407193", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Feb 5 11:10:27 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3407193", abstract = "Erasure codes are being extensively deployed in practical storage systems to prevent data loss with low redundancy. However, these codes require excessive disk I/Os and network traffic for recovering unavailable data. Among all erasure codes, Minimum \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "26", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Noh:2021:TTA, author = "Sam H. Noh", title = "Thanking the {TOS Associated Editors and Reviewers}", journal = j-TOS, volume = "17", number = "1", pages = "1:1--1:2", month = feb, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3442683", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Feb 5 11:10:27 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3442683", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "1", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Noh:2021:ISS, author = "Sam H. Noh and Brent Welch", title = "Introduction to the Special Section on {USENIX FAST 2020}", journal = j-TOS, volume = "17", number = "1", pages = "2:1--2:2", month = feb, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3442685", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Feb 5 11:10:27 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3442685", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "2", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Maneas:2021:RSE, author = "Stathis Maneas and Kaveh Mahdaviani and Tim Emami and Bianca Schroeder", title = "Reliability of {SSDs} in Enterprise Storage Systems: a Large-Scale Field Study", journal = j-TOS, volume = "17", number = "1", pages = "3:1--3:27", month = feb, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3423088", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Feb 5 11:10:27 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3423088", abstract = "This article presents the first large-scale field study of NAND-based SSDs in enterprise storage systems (in contrast to drives in distributed data center storage systems). The study is based on a very comprehensive set of field data, covering 1.6 \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "3", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Ganesan:2021:SEC, author = "Aishwarya Ganesan and Ramnatthan Alagappan and Andrea C. Arpaci-Dusseau and Remzi H. Arpaci-Dusseau", title = "Strong and Efficient Consistency with Consistency-aware Durability", journal = j-TOS, volume = "17", number = "1", pages = "4:1--4:27", month = feb, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3423138", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Feb 5 11:10:27 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3423138", abstract = "We introduce consistency-aware durability or Cad, a new approach to durability in distributed storage that enables strong consistency while delivering high performance. We demonstrate the efficacy of this approach by designing cross-client monotonic \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "4", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zhan:2021:CAW, author = "Yang Zhan and Alex Conway and Yizheng Jiao and Nirjhar Mukherjee and Ian Groombridge and Michael A. Bender and Martin Farach-Colton and William Jannen and Rob Johnson and Donald E. Porter and Jun Yuan", title = "Copy-on-Abundant-Write for Nimble File System Clones", journal = j-TOS, volume = "17", number = "1", pages = "5:1--5:27", month = feb, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3423495", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Feb 5 11:10:27 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", URL = "https://dl.acm.org/doi/10.1145/3423495", abstract = "Making logical copies, or clones, of files and directories is critical to many real-world applications and workflows, including backups, virtual machines, and containers. An ideal clone implementation meets the following performance goals: (1) creating \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "5", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Cheng:2021:NOH, author = "Wen Cheng and Chunyan Li and Lingfang Zeng and Yingjin Qian and Xi Li and Andr{\'e} Brinkmann", title = "{NVMM}-Oriented Hierarchical Persistent Client Caching for {Lustre}", journal = j-TOS, volume = "17", number = "1", pages = "6:1--6:22", month = feb, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3404190", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Feb 5 11:10:27 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3404190", abstract = "In high-performance computing (HPC), data and metadata are stored on special server nodes and client applications access the servers' data and metadata through a network, which induces network latencies and resource contention. These server nodes are \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "6", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Papagiannis:2021:KEM, author = "Anastasios Papagiannis and Giorgos Saloustros and Giorgos Xanthakis and Giorgos Kalaentzis and Pilar Gonzalez-Ferez and Angelos Bilas", title = "{Kreon}: an Efficient Memory-Mapped Key-Value Store for Flash Storage", journal = j-TOS, volume = "17", number = "1", pages = "7:1--7:32", month = feb, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3418414", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Feb 5 11:10:27 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3418414", abstract = "Persistent key-value stores have emerged as a main component in the data access path of modern data processing systems. However, they exhibit high CPU and I/O overhead. Nowadays, due to power limitations, it is important to reduce CPU overheads for data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "7", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Yadgar:2021:SBW, author = "Gala Yadgar and Moshe Gabel and Shehbaz Jaffer and Bianca Schroeder", title = "{SSD}-based Workload Characteristics and Their Performance Implications", journal = j-TOS, volume = "17", number = "1", pages = "8:1--8:26", month = feb, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3423137", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Feb 5 11:10:27 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3423137", abstract = "Storage systems are designed and optimized relying on wisdom derived from analysis studies of file-system and block-level workloads. However, while SSDs are becoming a dominant building block in many storage systems, their design continues to build on \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "8", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Gavrilovska:2021:ISI, author = "Ada Gavrilovska and Erez Zadok", title = "Introduction to the Special Issue on {USENIX ATC 2020}", journal = j-TOS, volume = "17", number = "2", pages = "9:1--9:2", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3457170", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Jun 16 08:47:13 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3457170", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "9", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Im:2021:DLT, author = "Junsu Im and Jinwook Bae and Chanwoo Chung and Arvind and Sungjin Lee", title = "Design of {LSM}-tree-based Key-value {SSDs} with Bounded Tails", journal = j-TOS, volume = "17", number = "2", pages = "10:1--10:27", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3452846", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Jun 16 08:47:13 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3452846", abstract = "Key-value store based on a log-structured merge-tree (LSM-tree) is preferable to hash-based key-value store, because an LSM-tree can support a wider variety of operations and show better performance, especially for writes. However, LSM-tree is difficult \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "10", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Bittman:2021:TDC, author = "Daniel Bittman and Peter Alvaro and Pankaj Mehra and Darrell D. E. Long and Ethan L. Miller", title = "{Twizzler}: a Data-centric {OS} for Non-volatile Memory", journal = j-TOS, volume = "17", number = "2", pages = "11:1--11:31", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3454129", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Jun 16 08:47:13 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3454129", abstract = "Byte-addressable, non-volatile memory (NVM) presents an opportunity to rethink the entire system stack. We present Twizzler, an operating system redesign for this near-future. Twizzler removes the kernel from the I/O path, provides programs with memory-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "11", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Rebello:2021:CAR, author = "Anthony Rebello and Yuvraj Patel and Ramnatthan Alagappan and Andrea C. Arpaci-Dusseau and Remzi H. Arpaci-Dusseau", title = "Can Applications Recover from fsync Failures?", journal = j-TOS, volume = "17", number = "2", pages = "12:1--12:30", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3450338", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Jun 16 08:47:13 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3450338", abstract = "We analyze how file systems and modern data-intensive applications react to fsync failures. First, we characterize how three Linux file systems (ext4, XFS, Btrfs) behave in the presence of failures. We find commonalities across file systems (pages are always marked clean, certain block writes always lead to unavailability) as well as differences (page content and failure reporting is varied). Next, we study how five widely used applications (PostgreSQL, LMDB, LevelDB, SQLite, Redis) handle fsync failures. Our findings show that although applications use many failure-handling strategies, none are sufficient: fsync failures can cause catastrophic outcomes such as data loss and corruption. Our findings have strong implications for the design of file systems and applications that intend to provide strong durability guarantees.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "12", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Li:2021:RPE, author = "Xiaolu Li and Zuoru Yang and Jinhong Li and Runhui Li and Patrick P. C. Lee and Qun Huang and Yuchong Hu", title = "Repair Pipelining for Erasure-coded Storage: Algorithms and Evaluation", journal = j-TOS, volume = "17", number = "2", pages = "13:1--13:29", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3436890", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Jun 16 08:47:13 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3436890", abstract = "We propose repair pipelining, a technique that speeds up the repair performance in general erasure-coded storage. By carefully scheduling the repair of failed data in small-size units across storage nodes in a pipelined manner, repair pipelining reduces \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "13", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Kim:2021:PMP, author = "Joonsung Kim and Kanghyun Choi and Wonsik Lee and Jangwoo Kim", title = "Performance Modeling and Practical Use Cases for Black-Box {SSDs}", journal = j-TOS, volume = "17", number = "2", pages = "14:1--14:38", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3440022", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Jun 16 08:47:13 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3440022", abstract = "Modern servers are actively deploying Solid-State Drives (SSDs) thanks to their high throughput and low latency. However, current server architects cannot achieve the full performance potential of commodity SSDs, as SSDs are complex devices designed for specific goals (e.g., latency, throughput, endurance, cost) with their internal mechanisms undisclosed to users. In this article, we propose SSDcheck, a novel SSD performance model to extract various internal mechanisms and predict the latency of next access to commodity black-box SSDs. We identify key performance-critical features (e.g., garbage collection, write buffering) and find their parameters (i.e., size, threshold) from each SSD by using our novel diagnosis code snippets. Then, SSDcheck constructs a performance model for a target SSD and dynamically manages the model to predict the latency of the next access. In addition, SSDcheck extracts and provides other useful internal mechanisms (e.g., fetch unit in multi-queue SSDs, background tasks triggering idle-time interval) for the storage system to fully exploit SSDs. By using those useful features and the performance model, we propose multiple practical use cases. Our evaluations show that SSDcheck's performance model is highly accurate, and proposed use cases achieve significant performance improvement in various scenarios.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "14", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Pan:2021:PLA, author = "Cheng Pan and Xiaolin Wang and Yingwei Luo and Zhenlin Wang", title = "Penalty- and Locality-aware Memory Allocation in {Redis} Using Enhanced {AET}", journal = j-TOS, volume = "17", number = "2", pages = "15:1--15:45", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447573", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Jun 16 08:47:13 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3447573", abstract = "Due to large data volume and low latency requirements of modern web services, the use of an in-memory key-value (KV) cache often becomes an inevitable choice (e.g., Redis and Memcached). The in-memory cache holds hot data, reduces request latency, and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "15", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Lu:2021:ISS, author = "Shan Lu and Jon Howell", title = "Introduction to the Special Section on {USENIX OSDI 2020}", journal = j-TOS, volume = "17", number = "3", pages = "16:1--16:1", month = aug, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3479434", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Sep 15 05:45:21 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3479434", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "16", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Yang:2021:LSA, author = "Juncheng Yang and Yao Yue and K. V. Rashmi", title = "A Large-scale Analysis of Hundreds of In-memory Key-value Cache Clusters at {Twitter}", journal = j-TOS, volume = "17", number = "3", pages = "17:1--17:35", month = aug, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3468521", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Sep 15 05:45:21 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3468521", abstract = "Modern web services use in-memory caching extensively to increase throughput and reduce latency. There have been several workload analyses of production systems that have fueled research in improving the effectiveness of in-memory caching systems. However,. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "17", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Wei:2021:XFR, author = "Xingda Wei and Rong Chen and Haibo Chen and Binyu Zang", title = "{XStore}: Fast {RDMA}-Based Ordered Key--Value Store Using Remote Learned Cache", journal = j-TOS, volume = "17", number = "3", pages = "18:1--18:32", month = aug, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3468520", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Sep 15 05:45:21 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3468520", abstract = "RDMA (Remote Direct Memory Access) has gained considerable interests in network-attached in-memory key-value stores. However, traversing the remote tree-based index in ordered key-value stores with RDMA becomes a critical obstacle, causing an order-of-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "18", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zhu:2021:ORE, author = "Bohong Zhu and Youmin Chen and Qing Wang and Youyou Lu and Jiwu Shu", title = "{Octopus +}: an {RDMA}-Enabled Distributed Persistent Memory File System", journal = j-TOS, volume = "17", number = "3", pages = "19:1--19:25", month = aug, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3448418", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Sep 15 05:45:21 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3448418", abstract = "Non-volatile memory and remote direct memory access (RDMA) provide extremely high performance in storage and network hardware. However, existing distributed file systems strictly isolate file system and network layers, and the heavy layered software \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "19", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zhang:2021:TVM, author = "Jiachen Zhang and Lixiao Cui and Peng Li and Xiaoguang Liu and Gang Wang", title = "Toward Virtual Machine Image Management for Persistent Memory", journal = j-TOS, volume = "17", number = "3", pages = "20:1--20:24", month = aug, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3450976", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Sep 15 05:45:21 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3450976", abstract = "Persistent memory's (PM) byte-addressability and high capacity will also make it emerging for virtualized environment. Modern virtual machine monitors virtualize PM using either I/O virtualization or memory virtualization. However, I/O virtualization will \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "20", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Hong:2021:RFR, author = "Duwon Hong and Keonsoo Ha and Minseok Ko and Myoungjun Chun and Yoona Kim and Sungjin Lee and Jihong Kim", title = "{Reparo}: a Fast {RAID} Recovery Scheme for Ultra-large {SSDs}", journal = j-TOS, volume = "17", number = "3", pages = "21:1--21:24", month = aug, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3450977", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Sep 15 05:45:21 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3450977", abstract = "A recent ultra-large SSD (e.g., a 32-TB SSD) provides many benefits in building cost-efficient enterprise storage systems. Owing to its large capacity, however, when such SSDs fail in a RAID storage system, a long rebuild overhead is inevitable for RAID \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "21", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Chikhaoui:2021:MOO, author = "Amina Chikhaoui and Laurent Lemarchand and Kamel Boukhalfa and Jalil Boukhobza", title = "Multi-objective Optimization of Data Placement in a Storage-as-a-Service Federated Cloud", journal = j-TOS, volume = "17", number = "3", pages = "22:1--22:32", month = aug, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3452741", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Sep 15 05:45:21 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3452741", abstract = "Cloud federation enables service providers to collaborate to provide better services to customers. For cloud storage services, optimizing customer object placement for a member of a federation is a real challenge. Storage, migration, and latency costs \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "22", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zhang:2021:NPM, author = "Baoquan Zhang and David H. C. Du", title = "{NVLSM}: a Persistent Memory Key--Value Store Using Log-Structured Merge Tree with Accumulative Compaction", journal = j-TOS, volume = "17", number = "3", pages = "23:1--23:26", month = aug, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3453300", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Sep 15 05:45:21 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3453300", abstract = "Computer systems utilizing byte-addressable Non-Volatile Memory (NVM) as memory/storage can provide low-latency data persistence. The widely used key-value stores using Log-Structured Merge Tree (LSM-Tree) are still beneficial for NVM systems in aspects \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "23", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Nachman:2021:GOS, author = "Aviv Nachman and Sarai Sheinvald and Ariel Kolikant and Gala Yadgar", title = "{GoSeed}: Optimal Seeding Plan for Deduplicated Storage", journal = j-TOS, volume = "17", number = "3", pages = "24:1--24:28", month = aug, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3453301", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Sep 15 05:45:21 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3453301", abstract = "Deduplication decreases the physical occupancy of files in a storage volume by removing duplicate copies of data chunks, but creates data-sharing dependencies that complicate standard storage management tasks. Specifically, data migration plans must \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "24", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Aguilera:2021:ISS, author = "Marcos K. Aguilera and Gala Yadgar", title = "Introduction to the Special Section on {USENIX FAST 2021}", journal = j-TOS, volume = "17", number = "4", pages = "25:1--25:1", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3485449", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Nov 3 09:56:08 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3485449", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "25", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Dong:2021:RED, author = "Siying Dong and Andrew Kryczka and Yanqin Jin and Michael Stumm", title = "{RocksDB}: Evolution of Development Priorities in a Key--value Store Serving Large-scale Applications", journal = j-TOS, volume = "17", number = "4", pages = "26:1--26:32", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3483840", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Nov 3 09:56:08 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3483840", abstract = "This article is an eight-year retrospective on development priorities for RocksDB, a key-value store developed at Facebook that targets large-scale distributed systems and that is optimized for Solid State Drives (SSDs). We describe how the priorities \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "26", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Li:2021:LNS, author = "Cheng Li and Hao Chen and Chaoyi Ruan and Xiaosong Ma and Yinlong Xu", title = "Leveraging {NVMe SSDs} for Building a Fast, Cost-effective, {LSM}-tree-based {KV} Store", journal = j-TOS, volume = "17", number = "4", pages = "27:1--27:29", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3480963", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Nov 3 09:56:08 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3480963", abstract = "Key-value (KV) stores support many crucial applications and services. They perform fast in-memory processing but are still often limited by I/O performance. The recent emergence of high-speed commodity non-volatile memory express solid-state drives (NVMe \ldots{})", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "27", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Fukatani:2021:LDR, author = "Takayuki Fukatani and Hieu Hanh Le and Haruo Yokota", title = "Lightweight Dynamic Redundancy Control with Adaptive Encoding for Server-based Storage", journal = j-TOS, volume = "17", number = "4", pages = "28:1--28:38", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3456292", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Nov 3 09:56:08 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3456292", abstract = "With the recent performance improvements in commodity hardware, low-cost commodity server-based storage has become a practical alternative to dedicated-storage appliances. Because of the high failure rate of commodity servers, data redundancy across \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "28", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Ghoshal:2021:PAM, author = "Devarshi Ghoshal and Lavanya Ramakrishnan", title = "Programming Abstractions for Managing Workflows on Tiered Storage Systems", journal = j-TOS, volume = "17", number = "4", pages = "29:1--29:21", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3457119", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Nov 3 09:56:08 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3457119", abstract = "Scientific workflows in High Performance Computing (HPC) environments are processing large amounts of data. The storage hierarchy on HPC systems is getting deeper, driven by new technologies (NVRAMs, SSDs, etc.) There is a need for new programming \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "29", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zhang:2021:IPD, author = "Datong Zhang and Yuhui Deng and Yi Zhou and Yifeng Zhu and Xiao Qin", title = "Improving the Performance of Deduplication-Based Backup Systems via Container Utilization Based Hot Fingerprint Entry Distilling", journal = j-TOS, volume = "17", number = "4", pages = "30:1--30:23", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3459626", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Nov 3 09:56:08 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3459626", abstract = "Data deduplication techniques construct an index consisting of fingerprint entries to identify and eliminate duplicated copies of repeating data. The bottleneck of disk-based index lookup and data fragmentation caused by eliminating duplicated chunks are \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "30", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Song:2021:TRN, author = "Xiaojia Song and Tao Xie and Stephen Fischer", title = "Two Reconfigurable {NDP} Servers: Understanding the Impact of Near-Data Processing on Data Center Applications", journal = j-TOS, volume = "17", number = "4", pages = "31:1--31:27", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3460201", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Nov 3 09:56:08 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3460201", abstract = "Existing near-data processing (NDP)-powered architectures have demonstrated their strength for some data-intensive applications. Data center servers, however, have to serve not only data-intensive but also compute-intensive applications. An in-depth \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "31", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Wu:2021:FAM, author = "Fenggang Wu and Bingzhe Li and David H. C. Du", title = "{FluidSMR}: Adaptive Management for Hybrid {SMR} Drives", journal = j-TOS, volume = "17", number = "4", pages = "32:1--32:30", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3465404", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Nov 3 09:56:08 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3465404", abstract = "Hybrid Shingled Magnetic Recording (H-SMR) drives are the most recently developed SMR drives, which allow dynamic conversion of the recording format between Conventional Magnetic Recording (CMR) and SMR on a single disk drive. We identify the unique \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "32", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Brown:2022:ISS, author = "Angela Demke Brown and Jay Lorch", title = "Introduction to the Special Section on {USENIX OSDI 2021}", journal = j-TOS, volume = "18", number = "1", pages = "1:1--1:1", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3507950", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Mar 4 08:42:28 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3507950", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "1", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Wang:2022:PNP, author = "Qing Wang and Youyou Lu and Junru Li and Minhui Xie and Jiwu Shu", title = "\pkg{Nap}: Persistent Memory Indexes for {NUMA} Architectures", journal = j-TOS, volume = "18", number = "1", pages = "2:1--2:35", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3507922", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Mar 4 08:42:28 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3507922", abstract = "We present Nap, a black-box approach that converts concurrent persistent memory (PM) indexes into non-uniform memory access (NUMA)-aware counterparts. Based on the observation that real-world workloads always feature skewed access patterns, Nap introduces \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "2", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Tai:2022:OSP, author = "Amy Tai and Igor Smolyar and Michael Wei and Dan Tsafrir", title = "Optimizing Storage Performance with Calibrated Interrupts", journal = j-TOS, volume = "18", number = "1", pages = "3:1--3:32", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3505139", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Mar 4 08:42:28 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3505139", abstract = "After request completion, an I/O device must decide whether to minimize latency by immediately firing an interrupt or to optimize for throughput by delaying the interrupt, anticipating that more requests will complete soon and help amortize the interrupt \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "3", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Macko:2022:SDF, author = "Peter Macko and Jason Hennessey", title = "Survey of Distributed File System Design Choices", journal = j-TOS, volume = "18", number = "1", pages = "4:1--4:34", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3465405", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Mar 4 08:42:28 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3465405", abstract = "Decades of research on distributed file systems and storage systems exists. New researchers and engineers have a lot of literature to study, but only a comparatively small number of high-level design choices are available when creating a distributed file system. And within each aspect of the system, typically several common approaches are used. So, rather than surveying distributed file systems, this article presents a survey of important design decisions and, within those decisions, the most commonly used options. It also presents a qualitative exploration of their tradeoffs. We include several relatively recent designs and their variations that illustrate other tradeoff choices in the design space, despite being underexplored. In doing so, we provide a primer on distributed file systems, and we also show areas that are overexplored and underexplored, in the hopes of inspiring new research.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "4", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Litz:2022:PRP, author = "Heiner Litz and Javier Gonzalez and Ana Klimovic and Christos Kozyrakis", title = "\pkg{RAIL}: Predictable, Low Tail Latency for {NVMe} Flash", journal = j-TOS, volume = "18", number = "1", pages = "5:1--5:21", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3465406", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Mar 4 08:42:28 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3465406", abstract = "Flash-based storage is replacing disk for an increasing number of data center applications, providing orders of magnitude higher throughput and lower average latency. However, applications also require predictable storage latency. Existing Flash devices \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "5", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Wang:2022:EEB, author = "Shucheng Wang and Ziyi Lu and Qiang Cao and Hong Jiang and Jie Yao and Yuanyuan Dong and Puyuan Yang and Changsheng Xie", title = "Exploration and Exploitation for Buffer-Controlled {HDD}-Writes for {SSD--HDD} Hybrid Storage Server", journal = j-TOS, volume = "18", number = "1", pages = "6:1--6:29", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3465410", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Mar 4 08:42:28 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3465410", abstract = "Hybrid storage servers combining solid-state drives (SSDs) and hard-drive disks (HDDs) provide cost-effectiveness and $ \mu $ s-level responsiveness for applications. However, observations from cloud storage system Pangu manifest that HDDs are often \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "6", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Li:2022:PBP, author = "Jun Li and Xiaofei Xu and Zhigang Cai and Jianwei Liao and Kenli Li and Balazs Gerofi and Yutaka Ishikawa", title = "Pattern-Based Prefetching with Adaptive Cache Management Inside of Solid-State Drives", journal = j-TOS, volume = "18", number = "1", pages = "7:1--7:25", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3474393", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Mar 4 08:42:28 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3474393", abstract = "This article proposes a pattern-based prefetching scheme with the support of adaptive cache management, at the flash translation layer of solid-state drives (SSDs). It works inside of SSDs and has features of OS dependence and uses transparency. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "7", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Wang:2022:CLI, author = "Zhaoguo Wang and Haibo Chen and Youyun Wang and Chuzhe Tang and Huan Wang", title = "The Concurrent Learned Indexes for Multicore Data Storage", journal = j-TOS, volume = "18", number = "1", pages = "8:1--8:35", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3478289", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Mar 4 08:42:28 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3478289", abstract = "We present XIndex, which is a concurrent index library and designed for fast queries. It includes a concurrent ordered index (XIndex-R) and a concurrent hash index (XIndex-H). Similar to a recent proposal of the learned index, the indexes in XIndex use \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "8", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Gao:2022:RTF, author = "Congming Gao and Min Ye and Chun Jason Xue and Youtao Zhang and Liang Shi and Jiwu Shu and Jun Yang", title = "Reprogramming {$3$D} {TLC} Flash Memory based Solid State Drives", journal = j-TOS, volume = "18", number = "1", pages = "9:1--9:33", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3487064", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Mar 4 08:42:28 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3487064", abstract = "NAND flash memory-based SSDs have been widely adopted. The scaling of SSD has evolved from plannar (2D) to 3D stacking. For reliability and other reasons, the technology node in 3D NAND SSD is larger than in 2D, but data density can be increased via \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "9", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Calciu:2022:ISS, author = "Irina Calciu and Geoff Kuenning", title = "Introduction to the Special Section on {USENIX ATC 2021}", journal = j-TOS, volume = "18", number = "2", pages = "10:1--10:2", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3519550", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon May 9 06:54:11 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3519550", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "10", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zuo:2022:ROS, author = "Pengfei Zuo and Qihui Zhou and Jiazhao Sun and Liu Yang and Shuangwu Zhang and Yu Hua and James Cheng and Rongfeng He and Huabing Yan", title = "{RACE}: One-sided {RDMA}-conscious Extendible Hashing", journal = j-TOS, volume = "18", number = "2", pages = "11:1--11:29", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3511895", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon May 9 06:54:11 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3511895", abstract = "Memory disaggregation is a promising technique in datacenters with the benefit of improving resource utilization, failure isolation, and elasticity. Hashing indexes have been widely used to provide fast lookup services in distributed memory systems. However, traditional hashing indexes become inefficient for disaggregated memory, since the computing power in the memory pool is too weak to execute complex index requests. To provide efficient indexing services in disaggregated memory scenarios, this article proposes RACE hashing, a one-sided RDMA-Conscious Extendible hashing index with lock-free remote concurrency control and efficient remote resizing. RACE hashing enables all index operations to be efficiently executed by using only one-sided RDMA verbs without involving any compute resource in the memory pool. To support remote concurrent access with high performance, RACE hashing leverages a lock-free remote concurrency control scheme to enable different clients to concurrently operate the same hashing index in the memory pool in a lock-free manner. To resize the hash table with low overheads, RACE hashing leverages an extendible remote resizing scheme to reduce extra RDMA accesses caused by extendible resizing and allow concurrent request execution during resizing. Extensive experimental results demonstrate that RACE hashing outperforms state-of-the-art distributed in-memory hashing indexes by 1.4--13.7$ \times $ in YCSB hybrid workloads", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "11", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Kwon:2022:SFF, author = "Dongup Kwon and Wonsik Lee and Dongryeong Kim and Junehyuk Boo and Jangwoo Kim", title = "{SmartFVM}: a Fast, Flexible, and Scalable Hardware-based Virtualization for Commodity Storage Devices", journal = j-TOS, volume = "18", number = "2", pages = "12:1--12:27", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3511213", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon May 9 06:54:11 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", URL = "https://dl.acm.org/doi/10.1145/3511213", abstract = "A computational storage device incorporating a computation unit inside or near its storage unit is a highly promising technology to maximize a storage server's performance. However, to apply such computational storage devices and take their full potential in virtualized environments, server architects must resolve a fundamental challenge: cost-effective virtualization. This critical challenge can be directly addressed by the following questions: (1) how to virtualize two different hardware units (i.e., computation and storage), and (2) how to integrate them to construct virtual computational storage devices, and (3) how to provide them to users. However, the existing methods for computational storage virtualization severely suffer from their low performance and high costs due to the lack of hardware-assisted virtualization support.\par In this work, we propose SmartFVM-Engine, an FPGA card designed to maximize the performance and cost-effectiveness of computational storage virtualization. SmartFVM-Engine introduces three key ideas to achieve the design goals. First, it achieves high virtualization performance by applying hardware-assisted virtualization to both computation and storage units. Second, it further improves the performance by applying hardware-assisted resource orchestration for the virtualized units. Third, it achieves high cost-effectiveness by dynamically constructing and scheduling virtual computational storage devices. To the best of our knowledge, this is the first work to implement a hardware-assisted virtualization mechanism for modern computational storage devices.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "12", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Kassa:2022:POD, author = "Hiwot Tadese Kassa and Jason Akers and Mrinmoy Ghosh and Zhichao Cao and Vaibhav Gogte and Ronald Dreslinski", title = "Power-optimized Deployment of Key-value Stores Using Storage Class Memory", journal = j-TOS, volume = "18", number = "2", pages = "13:1--13:26", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3511905", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon May 9 06:54:11 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3511905", abstract = "High-performance flash-based key-value stores in data-centers utilize large amounts of DRAM to cache hot data. However, motivated by the high cost and power consumption of DRAM, server designs with lower DRAM-per-compute ratio are becoming popular. These \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "13", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Han:2022:SFR, author = "Runzhou Han and Om Rameshwar Gatla and Mai Zheng and Jinrui Cao and Di Zhang and Dong Dai and Yong Chen and Jonathan Cook", title = "A Study of Failure Recovery and Logging of High-Performance Parallel File Systems", journal = j-TOS, volume = "18", number = "2", pages = "14:1--14:44", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3483447", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon May 9 06:54:11 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3483447", abstract = "Large-scale parallel file systems (PFSs) play an essential role in high-performance computing (HPC). However, despite their importance, their reliability is much less studied or understood compared with that of local storage systems or cloud storage \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "14", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Cao:2022:HSC, author = "Zhichao Cao and Huibing Dong and Yixun Wei and Shiyong Liu and David H. C. Du", title = "{IS-HBase}: an In-Storage Computing Optimized {HBase} with {I/O} Offloading and Self-Adaptive Caching in Compute-Storage Disaggregated Infrastructure", journal = j-TOS, volume = "18", number = "2", pages = "15:1--15:42", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3488368", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon May 9 06:54:11 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3488368", abstract = "Active storage devices and in-storage computing are proposed and developed in recent years to effectively reduce the amount of required data traffic and to improve the overall application performance. They are especially preferred in the compute-storage \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "15", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Liu:2022:CSP, author = "Weihua Liu and Fei Wu and Xiang Chen and Meng Zhang and Yu Wang and Xiangfeng Lu and Changsheng Xie", title = "Characterization Summary of Performance, Reliability, and Threshold Voltage Distribution of {$3$D} Charge-Trap {NAND} Flash Memory", journal = j-TOS, volume = "18", number = "2", pages = "16:1--16:25", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3491230", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon May 9 06:54:11 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3491230", abstract = "Solid-state drive (SSD) gradually dominates in the high-performance storage scenarios. Three-dimension (3D) NAND flash memory owning high-storage capacity is becoming a mainstream storage component of SSD. However, the interferences of the new 3D charge \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "16", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Bhimani:2022:ASI, author = "Janki Bhimani and Zhengyu Yang and Jingpei Yang and Adnan Maruf and Ningfang Mi and Rajinikanth Pandurangan and Changho Choi and Vijay Balakrishnan", title = "Automatic Stream Identification to Improve Flash Endurance in Data Centers", journal = j-TOS, volume = "18", number = "2", pages = "17:1--17:29", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3470007", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon May 9 06:54:11 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3470007", abstract = "The demand for high performance I/O in Storage-as-a-Service (SaaS) is increasing day by day. To address this demand, NAND Flash-based Solid-state Drives (SSDs) are commonly used in data centers as cache- or top-tiers in the storage rack ascribe to their \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "17", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Ge:2022:HFS, author = "Xiongzi Ge and Zhichao Cao and David H. C. Du and Pradeep Ganesan and Dennis Hahn", title = "{HintStor}: a Framework to Study {I/O} Hints in Heterogeneous Storage", journal = j-TOS, volume = "18", number = "2", pages = "18:1--18:24", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3489143", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon May 9 06:54:11 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3489143", abstract = "To bridge the giant semantic gap between applications and modern storage systems, passing a piece of tiny and useful information, called I/O access hints, from upper layers to the storage layer may greatly improve application performance and ease data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "18", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Noh:2022:ISS, author = "Sam H. Noh", title = "Introduction to the Special Section on {SOSP 2021}", journal = j-TOS, volume = "18", number = "3", pages = "19:1--19:1", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3542850", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Sep 28 10:41:23 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3542850", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "19", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Ganesan:2022:ENE, author = "Aishwarya Ganesan and Ramnatthan Alagappan and Anthony Rebello and Andrea C. Arpaci-Dusseau and Remzi H. Arpaci-Dusseau", title = "Exploiting Nil-external Interfaces for Fast Replicated Storage", journal = j-TOS, volume = "18", number = "3", pages = "20:1--20:35", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3542821", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Sep 28 10:41:23 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3542821", abstract = "Do some storage interfaces enable higher performance than others? Can one identify and exploit such interfaces to realize high performance in storage systems? This article answers these questions in the affirmative by identifying nil-externality, a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "20", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{McAllister:2022:KTP, author = "Sara McAllister and Benjamin Berg and Julian Tutuncu-Macias and Juncheng Yang and Sathya Gunasekar and Jimmy Lu and Daniel S. Berger and Nathan Beckmann and Gregory R. Ganger", title = "{Kangaroo}: Theory and Practice of Caching Billions of Tiny Objects on Flash", journal = j-TOS, volume = "18", number = "3", pages = "21:1--21:33", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3542928", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Sep 28 10:41:23 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3542928", abstract = "Many social-media and IoT services have very large working sets consisting of billions of tiny ($ \approx $ 100 B) objects. Large, flash-based caches are important to serving these working sets at acceptable monetary cost. However, caching tiny objects on flash is \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "21", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Lembke:2022:DIF, author = "James Lembke and Pierre-Louis Roman and Patrick Eugster", title = "{DEFUSE}: an Interface for Fast and Correct User Space File System Access", journal = j-TOS, volume = "18", number = "3", pages = "22:1--22:29", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3494556", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Sep 28 10:41:23 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3494556", abstract = "Traditionally, the only option for developers was to implement file systems (FSs) via drivers within the operating system kernel. However, there exists a growing number of file systems (FSs), notably distributed FSs for the cloud, whose interfaces are \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "22", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zhang:2022:BGF, author = "Yiwen Zhang and Ting Yao and Jiguang Wan and Changsheng Xie", title = "Building {GC}-free Key--value Store on {HM-SMR} Drives with {ZoneFS}", journal = j-TOS, volume = "18", number = "3", pages = "23:1--23:23", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3502846", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Sep 28 10:41:23 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3502846", abstract = "Host-managed shingled magnetic recording drives (HM-SMR) are advantageous in capacity to harness the explosive growth of data. For key-value (KV) stores based on log-structured merge trees (LSM-trees), the HM-SMR drive is an ideal solution owning to its \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "23", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zheng:2022:WBD, author = "Jianwei Zheng and Zhenhua Li and Yuanhui Qiu and Hao Lin and He Xiao and Yang Li and Yunhao Liu", title = "{WebAssembly}-based Delta Sync for Cloud Storage Services", journal = j-TOS, volume = "18", number = "3", pages = "24:1--24:31", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3502847", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Sep 28 10:41:23 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3502847", abstract = "Delta synchronization (sync) is crucial to the network-level efficiency of cloud storage services, especially when handling large files with small increments. Practical delta sync techniques are, however, only available for PC clients and mobile apps, but \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "24", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zou:2022:HDS, author = "Xiangyu Zou and Jingsong Yuan and Philip Shilane and Wen Xia and Haijun Zhang and Xuan Wang", title = "From Hyper-dimensional Structures to Linear Structures: Maintaining Deduplicated Data's Locality", journal = j-TOS, volume = "18", number = "3", pages = "25:1--25:28", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3507921", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Sep 28 10:41:23 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3507921", abstract = "Data deduplication is widely used to reduce the size of backup workloads, but it has the known disadvantage of causing poor data locality, also referred to as the fragmentation problem. This results from the gap between the hyper-dimensional structure of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "25", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{May:2022:DGE, author = "Michael J. May", title = "{Donag}: Generating Efficient Patches and Diffs for Compressed Archives", journal = j-TOS, volume = "18", number = "3", pages = "26:1--26:41", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3507919", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Sep 28 10:41:23 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3507919", abstract = "Differencing between compressed archives is a common task in file management and synchronization. Applications include source code distribution, application updates, and document synchronization. General purpose binary differencing tools can create and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "26", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Einziger:2022:LRS, author = "Gil Einziger and Ohad Eytan and Roy Friedman and Benjamin Manes", title = "Lightweight Robust Size Aware Cache Management", journal = j-TOS, volume = "18", number = "3", pages = "27:1--27:23", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3507920", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Wed Sep 28 10:41:23 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3507920", abstract = "Modern key-value stores, object stores, Internet proxy caches, and Content Delivery Networks (CDN) often manage objects of diverse sizes, e.g., blobs, video files of different lengths, images with varying resolutions, and small documents. In such \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "27", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Dean:2022:ISS, author = "Hildebrand Dean and Donald Porter", title = "Introduction to the Special Section on {USENIX FAST 2022}", journal = j-TOS, volume = "18", number = "4", pages = "28:1--28:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3564770", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Dec 20 07:58:36 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3564770", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "28", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Jaffer:2022:IEN, author = "Shehbaz Jaffer and Kaveh Mahdaviani and Bianca Schroeder", title = "Improving the Endurance of Next Generation {SSD}'s using {WOM-v} Codes", journal = j-TOS, volume = "18", number = "4", pages = "29:1--29:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3565027", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Dec 20 07:58:36 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3565027", abstract = "High density Solid State Drives, such as QLC drives, offer increased storage capacity, but a magnitude lower Program and Erase (P/E) cycles, limiting their endurance and hence usability. We present the design and implementation of non-binary, Voltage-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "29", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Li:2022:CRF, author = "Ruibin Li and Xiang Ren and Xu Zhao and Siwei He and Michael Stumm and Ding Yuan", title = "{ctFS}: Replacing File Indexing with Hardware Memory Translation through Contiguous File Allocation for Persistent Memory", journal = j-TOS, volume = "18", number = "4", pages = "30:1--30:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3565026", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Dec 20 07:58:36 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3565026", abstract = "Persistent byte-addressable memory (PM) is poised to become prevalent in future computer systems. PMs are significantly faster than disk storage, and accesses to PMs are governed by the Memory Management Unit (MMU) just as accesses with volatile RAM. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "30", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Kisous:2022:WMG, author = "Roei Kisous and Ariel Kolikant and Abhinav Duggal and Sarai Sheinvald and Gala Yadgar", title = "The what, The from, and The to: The Migration Games in Deduplicated Systems", journal = j-TOS, volume = "18", number = "4", pages = "31:1--31:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3565025", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Dec 20 07:58:36 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3565025", abstract = "Deduplication reduces the size of the data stored in large-scale storage systems by replacing duplicate data blocks with references to their unique copies. This creates dependencies between files that contain similar content and complicates the management \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "31", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Yang:2022:TED, author = "Zuoru Yang and Jingwei Li and Yanjing Ren and Patrick P. C. Lee", title = "Tunable Encrypted Deduplication with Attack-resilient Key Management", journal = j-TOS, volume = "18", number = "4", pages = "32:1--32:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3510614", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Dec 20 07:58:36 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3510614", abstract = "Conventional encrypted deduplication approaches retain the deduplication capability on duplicate chunks after encryption by always deriving the key for encryption/decryption from the chunk content, but such a deterministic nature causes information \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "32", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Nicolaou:2022:AAR, author = "Nicolas Nicolaou and Viveck Cadambe and N. Prakash and Andria Trigeorgi and Kishori Konwar and Muriel Medard and Nancy Lynch", title = "Ares: Adaptive, Reconfigurable, Erasure coded, Atomic Storage", journal = j-TOS, volume = "18", number = "4", pages = "33:1--33:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3510613", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Dec 20 07:58:36 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3510613", abstract = "Emulating a shared atomic, read/write storage system is a fundamental problem in distributed computing. Replicating atomic objects among a set of data hosts was the norm for traditional implementations (e.g., [ 11 ]) in order to guarantee the availability \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "33", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Lawson:2022:EAS, author = "Margaret Lawson and William Gropp and Jay Lofstead", title = "{EMPRESS}: Accelerating Scientific Discovery through Descriptive Metadata Management", journal = j-TOS, volume = "18", number = "4", pages = "34:1--34:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3523698", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Dec 20 07:58:36 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3523698", abstract = "High-performance computing scientists are producing unprecedented volumes of data that take a long time to load for analysis. However, many analyses only require loading in the data containing particular features of interest and scientists have many \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "34", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zhou:2022:DFP, author = "Yang Zhou and Fang Wang and Dan Feng", title = "A Disk Failure Prediction Method Based on Active Semi-supervised Learning", journal = j-TOS, volume = "18", number = "4", pages = "35:1--35:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3523699", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Dec 20 07:58:36 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3523699", abstract = "Disk failure has always been a major problem for data centers, leading to data loss. Current disk failure prediction approaches are mostly offline and assume that the disk labels required for training learning models are available and accurate. However, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "35", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Wang:2022:TFS, author = "Rui Wang and Yongkun Li and Yinlong Xu and Hong Xie and John C. S. Lui and Shuibing He", title = "Toward Fast and Scalable Random Walks over Disk-Resident Graphs via Efficient {I/O} Management", journal = j-TOS, volume = "18", number = "4", pages = "36:1--36:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3533579", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Dec 20 07:58:36 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3533579", abstract = "Traditional graph systems mainly use the iteration-based model, which iteratively loads graph blocks into memory for analysis so as to reduce random I/Os. However, this iteration-based model limits the efficiency and scalability of running random walk, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "36", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Anonymous:2023:ECM, author = "Anonymous", title = "{Editor-in-Chief} Message", journal = j-TOS, volume = "19", number = "1", pages = "1:1--1:1", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3574325", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Apr 17 12:00:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3574325", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "1", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zhang:2023:OCD, author = "Yiming Zhang and Li Wang and Shun Gai and Qiwen Ke and Wenhao Li and Zhenlong Song and Guangtao Xue and Jiwu Shu", title = "{Oasis}: Controlling Data Migration in Expansion of Object-based Storage Systems", journal = j-TOS, volume = "19", number = "1", pages = "2:1--2:22", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3568424", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Apr 17 12:00:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3568424", abstract = "Object-based storage systems have been widely used for various scenarios such as file storage, block storage, blob (e.g., large videos) storage, and so on, where the data is placed among a large number of object storage devices (OSDs). Data placement is \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "2", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Yang:2023:EEM, author = "Bin Yang and Wei Xue and Tianyu Zhang and Shichao Liu and Xiaosong Ma and Xiyang Wang and Weiguo Liu", title = "End-to-end {I/O} Monitoring on Leading Supercomputers", journal = j-TOS, volume = "19", number = "1", pages = "3:1--3:35", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3568425", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Apr 17 12:00:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/super.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3568425", abstract = "This paper offers a solution to overcome the complexities of production system I/O performance monitoring. We present Beacon, an end-to-end I/O resource monitoring and diagnosis system for the 40960-node Sunway TaihuLight supercomputer, currently the fourth-ranked supercomputer in the world. Beacon simultaneously collects and correlates I/O tracing/profiling data from all the compute nodes, forwarding nodes, storage nodes, and metadata servers. With mechanisms such as aggressive online and offline trace compression and distributed caching/storage, it delivers scalable, low-overhead, and sustainable I/O diagnosis under production use. With Beacon's deployment on TaihuLight for more than three years, we demonstrate Beacon's effectiveness with real-world use cases for I/O performance issue identification and diagnosis. It has already successfully helped center administrators identify obscure design or configuration flaws, system anomaly occurrences, I/O performance interference, and resource under- or over-provisioning problems. Several of the exposed problems have already been fixed, with others being currently addressed. Encouraged by Beacon's success in I/O monitoring, we extend it to monitor interconnection networks, which is another contention point on supercomputers. In addition, we demonstrate Beacon's generality by extending it to other supercomputers. Both Beacon codes and part of collected monitoring data are released.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "3", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Iliadis:2023:REE, author = "Ilias Iliadis", title = "Reliability Evaluation of Erasure-coded Storage Systems with Latent Errors", journal = j-TOS, volume = "19", number = "1", pages = "4:1--4:47", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3568313", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Apr 17 12:00:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3568313", abstract = "Large-scale storage systems employ erasure-coding redundancy schemes to protect against device failures. The adverse effect of latent sector errors on the Mean Time to Data Loss (MTTDL) and the Expected Annual Fraction of Data Loss (EAFDL) reliability \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "4", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Li:2023:EPN, author = "Huaicheng Li and Martin L. Putra and Ronald Shi and Fadhil I. Kurnia and Xing Lin and Jaeyoung Do and Achmad Imam Kistijantoro and Gregory R. Ganger and Haryadi S. Gunawi", title = "Extending and Programming the {NVMe} {I/O} Determinism Interface for Flash Arrays", journal = j-TOS, volume = "19", number = "1", pages = "5:1--5:33", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3568427", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Apr 17 12:00:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3568427", abstract = "Predictable latency on flash storage is a long-pursuit goal, yet unpredictability stays due to the unavoidable disturbance from many well-known SSD internal activities. To combat this issue, the recent NVMe IO Determinism (IOD) interface advocates host-\ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "5", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Lin:2023:IID, author = "Lifang Lin and Yuhui Deng and Yi Zhou and Yifeng Zhu", title = "{InDe}: an Inline Data Deduplication Approach via Adaptive Detection of Valid Container Utilization", journal = j-TOS, volume = "19", number = "1", pages = "6:1--6:27", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3568426", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Apr 17 12:00:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3568426", abstract = "Inline deduplication removes redundant data in real-time as data is being sent to the storage system. However, it causes data fragmentation: logically consecutive chunks are physically scattered across various containers after data deduplication. Many \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "6", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Liao:2023:ECC, author = "Xiaojian Liao and Youyou Lu and Zhe Yang and Jiwu Shu", title = "Efficient Crash Consistency for {NVMe} over {PCIe} and {RDMA}", journal = j-TOS, volume = "19", number = "1", pages = "7:1--7:35", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3568428", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Apr 17 12:00:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3568428", abstract = "This article presents crash-consistent Non-Volatile Memory Express (ccNVMe), a novel extension of the NVMe that defines how host software communicates with the non-volatile memory (e.g., solid-state drive) across a PCI Express bus and RDMA-capable \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "7", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Einziger:2023:BCP, author = "Gil Einziger and Omri Himelbrand and Erez Waisbard", title = "Boosting Cache Performance by Access Time Measurements", journal = j-TOS, volume = "19", number = "1", pages = "8:1--8:29", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3572778", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Apr 17 12:00:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3572778", abstract = "Most modern systems utilize caches to reduce the average data access time and optimize their performance. Recently proposed policies implicitly assume uniform access times, but variable access times naturally appear in domains such as storage, web search, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "8", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Akgun:2023:ISS, author = "Ibrahim Umit Akgun and Ali Selman Aydin and Andrew Burford and Michael McNeill and Michael Arkhangelskiy and Erez Zadok", title = "Improving Storage Systems Using Machine Learning", journal = j-TOS, volume = "19", number = "1", pages = "9:1--9:30", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3568429", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Apr 17 12:00:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3568429", abstract = "Operating systems include many heuristic algorithms designed to improve overall storage performance and throughput. Because such heuristics cannot work well for all conditions and workloads, system designers resorted to exposing numerous tunable \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "9", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Schindler:2023:ISS, author = "Jiri Schindler and Noa Zilberman", title = "Introduction to the Special Section on {USENIX ATC 2022}", journal = j-TOS, volume = "19", number = "2", pages = "10:1--10:1", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3582557", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Apr 17 12:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3582557", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "10", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Kwon:2023:RSD, author = "Miryeong Kwon and Seungjun Lee and Hyunkyu Choi and Jooyoung Hwang and Myoungsoo Jung", title = "Realizing Strong Determinism Contract on Log-Structured Merge Key--Value Stores", journal = j-TOS, volume = "19", number = "2", pages = "11:1--11:29", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3582695", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Apr 17 12:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3582695", abstract = "We propose Vigil-KV, a hardware and software co-designed framework that eliminates long-tail latency almost perfectly by introducing strong latency determinism. To make Get latency deterministic, Vigil-KV first enables a predictable latency mode (PLM) \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "11", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Bergman:2023:ZBY, author = "Shai Bergman and Niklas Cassel and Matias Bj{\o}rling and Mark Silberstein", title = "{ZNSwap}: un-Block your Swap", journal = j-TOS, volume = "19", number = "2", pages = "12:1--12:25", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3582434", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Apr 17 12:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3582434", abstract = "We introduce ZNSwap, a novel swap subsystem optimized for the recent Zoned Namespace (ZNS) SSDs. ZNSwap leverages ZNS's explicit control over data management on the drive and introduces a space-efficient host-side Garbage Collector (GC) for swap storage \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "12", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Yang:2023:CTE, author = "Tzu-Wei Yang and Seth Pollen and Mustafa Uysal and Arif Merchant and Homer Wolfmeister and Junaid Khalid", title = "{CacheSack}: Theory and Experience of {Google}'s Admission Optimization for Datacenter Flash Caches", journal = j-TOS, volume = "19", number = "2", pages = "13:1--13:24", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3582014", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Apr 17 12:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3582014", abstract = "This article describes the algorithm, implementation, and deployment experience of CacheSack, the admission algorithm for Google datacenter flash caches. CacheSack minimizes the dominant costs of Google's datacenter flash caches: disk IO and flash \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "13", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Aguilera:2023:ISS, author = "Marcos K. Aguilera and Hakim Weatherspoon", title = "Introduction to the Special Section on {USENIX OSDI} 2022", journal = j-TOS, volume = "19", number = "2", pages = "14:1--14:1", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3584363", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Apr 17 12:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3584363", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "14", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Feng:2023:TUT, author = "Guanyu Feng and Huanqi Cao and Xiaowei Zhu and Bowen Yu and Yuanwei Wang and Zixuan Ma and Shengqi Chen and Wenguang Chen", title = "{TriCache}: a User-Transparent Block Cache Enabling High-Performance Out-of-Core Processing with In-Memory Programs", journal = j-TOS, volume = "19", number = "2", pages = "15:1--15:30", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3583139", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Apr 17 12:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3583139", abstract = "Out-of-core systems rely on high-performance cache sub-systems to reduce the number of I/O operations. Although the page cache in modern operating systems enables transparent access to memory and storage devices, it suffers from efficiency and scalability \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "15", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Li:2023:DCA, author = "Jinhong Li and Qiuping Wang and Patrick P. C. Lee and Chao Shi", title = "An In-depth Comparative Analysis of Cloud Block Storage Workloads: Findings and Implications", journal = j-TOS, volume = "19", number = "2", pages = "16:1--16:32", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3572779", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Apr 17 12:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3572779", abstract = "Cloud block storage systems support diverse types of applications in modern cloud services. Characterizing their input/output (I/O) activities is critical for guiding better system designs and optimizations. In this article, we present an in-depth \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "16", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Yang:2023:PSA, author = "Suli Yang and Jing Liu and Andrea Arpaci-Dusseau and Remzi Arpaci-Dusseau", title = "Principled Schedulability Analysis for Distributed Storage Systems Using Thread Architecture Models", journal = j-TOS, volume = "19", number = "2", pages = "17:1--17:47", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3574323", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Apr 17 12:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3574323", abstract = "In this article, we present an approach to systematically examine the schedulability of distributed storage systems, identify their scheduling problems, and enable effective scheduling in these systems. We use Thread Architecture Models (TAMs) to describe \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "17", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Pang:2023:PCP, author = "Shujie Pang and Yuhui Deng and Genxiong Zhang and Yi Zhou and Yaoqin Huang and Xiao Qin", title = "{PSA-Cache}: a Page-state-aware Cache Scheme for Boosting {$3$D} {NAND} Flash Performance", journal = j-TOS, volume = "19", number = "2", pages = "18:1--18:27", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3574324", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Apr 17 12:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3574324", abstract = "Garbage collection (GC) plays a pivotal role in the performance of 3D NAND flash memory, where Copyback has been widely used to accelerate valid page migration during GC. Unfortunately, copyback is constrained by the parity symmetry issue: data read from \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "18", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{He:2023:FWO, author = "Kewen He and Yujie An and Yijing Luo and Xiaoguang Liu and Gang Wang", title = "{FlatLSM}: Write-Optimized {LSM}-Tree for {PM}-Based {KV} Stores", journal = j-TOS, volume = "19", number = "2", pages = "19:1--19:26", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3579855", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Apr 17 12:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3579855", abstract = "The Log-Structured Merge Tree (LSM-Tree) is widely used in key-value (KV) stores because of its excwrite performance. But LSM-Tree-based KV stores still have the overhead of write-ahead log and write stall caused by slow L$_0$ flush and L$_0$ --- L$_1$ compaction. New \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "19", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zheng:2023:THP, author = "Shengan Zheng and Morteza Hoseinzadeh and Steven Swanson and Linpeng Huang", title = "{TPFS}: a High-Performance Tiered File System for Persistent Memories and Disks", journal = j-TOS, volume = "19", number = "2", pages = "20:1--20:28", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3580280", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Apr 17 12:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3580280", abstract = "Emerging fast, byte-addressable persistent memory (PM) promises substantial storage performance gains compared with traditional disks. We present TPFS, a tiered file system that combines PM and slow disks to create a storage system with near-PM \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "20", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zhang:2023:LVA, author = "Ming Zhang and Yu Hua and Pengfei Zuo and Lurong Liu", title = "Localized Validation Accelerates Distributed Transactions on Disaggregated Persistent Memory", journal = j-TOS, volume = "19", number = "3", pages = "21:1--21:35", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3582012", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Aug 10 07:28:36 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3582012", abstract = "Persistent memory (PM) disaggregation significantly improves the resource utilization and failure isolation to build a scalable and cost-effective remote memory pool in modern data centers. However, due to offering limited computing power and overlooking \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "21", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Xia:2023:DFL, author = "Wen Xia and Lifeng Pu and Xiangyu Zou and Philip Shilane and Shiyi Li and Haijun Zhang and Xuan Wang", title = "The Design of Fast and Lightweight Resemblance Detection for Efficient Post-Deduplication Delta Compression", journal = j-TOS, volume = "19", number = "3", pages = "22:1--22:30", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3584663", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Aug 10 07:28:36 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3584663", abstract = "Post-deduplication delta compression is a data reduction technique that calculates and stores the differences of very similar but non-duplicate chunks in storage systems, which is able to achieve a very high compression ratio. However, the low throughput \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "22", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Li:2023:PBA, author = "Jiaxin Li and Yiming Zhang and Shan Lu and Haryadi S. Gunawi and Xiaohui Gu and Feng Huang and Dongsheng Li", title = "Performance Bug Analysis and Detection for Distributed Storage and Computing Systems", journal = j-TOS, volume = "19", number = "3", pages = "23:1--23:33", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3580281", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Aug 10 07:28:36 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3580281", abstract = "This article systematically studies 99 distributed performance bugs from five widely deployed distributed storage and computing systems (Cassandra, HBase, HDFS, Hadoop MapReduce and ZooKeeper). We present the TaxPerf database, which collectively organizes \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "23", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Qin:2023:KRQ, author = "Mian Qin and Qing Zheng and Jason Lee and Bradley Settlemyer and Fei Wen and Narasimha Reddy and Paul Gratz", title = "{KVRangeDB}: Range Queries for a Hash-based Key--Value Device", journal = j-TOS, volume = "19", number = "3", pages = "24:1--24:21", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3582013", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Aug 10 07:28:36 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3582013", abstract = "Key-value (KV) software has proven useful to a wide variety of applications including analytics, time-series databases, and distributed file systems. To satisfy the requirements of diverse workloads, KV stores have been carefully tailored to best match \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "24", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Sha:2023:VGB, author = "Zhibing Sha and Jun Li and Fengxiang Zhang and Min Huang and Zhigang Cai and Francois Trahay and Jianwei Liao", title = "Visibility Graph-based Cache Management for {DRAM} Buffer Inside Solid-state Drives", journal = j-TOS, volume = "19", number = "3", pages = "25:1--25:21", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3586576", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Aug 10 07:28:36 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3586576", abstract = "Most solid-state drives (SSDs) adopt an on-board Dynamic Random Access Memory (DRAM) to buffer the write data, which can significantly reduce the amount of write operations committed to the flash array of SSD if data exhibits locality in write operations. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "25", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Sun:2023:USA, author = "Diansen Sun and Ruixiong Tan and Yunpeng Chai", title = "A Universal {SMR}-aware Cache Framework with Deep Optimization for {DM-SMR} and {HM-SMR} Disks", journal = j-TOS, volume = "19", number = "3", pages = "26:1--26:35", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3588442", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Aug 10 07:28:36 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3588442", abstract = "To satisfy the enormous storage capacities required for big data, data centers have been adopting high-density shingled magnetic recording (SMR) disks. However, the weak fine-grained random write performance of SMR disks caused by their inherent write \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "26", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Jackowski:2023:DTL, author = "Andrzej Jackowski and Leszek Gryz and Micha{\l} We{\l}nicki and Cezary Dubnicki and Konrad Iwanicki", title = "{Derrick}: a Three-layer Balancer for Self-managed Continuous Scalability", journal = j-TOS, volume = "19", number = "3", pages = "27:1--27:34", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3594543", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Aug 10 07:28:36 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3594543", abstract = "Data arrangement determines the capacity, resilience, and performance of a distributed storage system. A scalable self-managed system must place its data efficiently not only during stable operation but also after an expansion, planned downscaling, or \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "27", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Wu:2023:CBM, author = "Haonan Wu and Shuxian Wang and Zhanfeng Jin and Yuhang Zhang and Ruyun Ma and Sijin Fan and Ruili Chao", title = "{CostCounter}: a Better Method for Collision Mitigation in Cuckoo Hashing", journal = j-TOS, volume = "19", number = "3", pages = "28:1--28:24", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3596910", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Aug 10 07:28:36 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3596910", abstract = "Hardware is often required to support fast search and high-throughput applications. Consequently, the performance of search algorithms is limited by storage bandwidth. Hence, the search algorithm must be optimized accordingly. We propose a CostCounter (CC) algorithm based on cuckoo hashing and an Improved CostCounter (ICC) algorithm. A better path can be selected when collisions occur using a cost counter to record the kick-out situation. Our simulation results indicate that the CC and ICC algorithms can achieve more significant performance improvements than Random Walk (RW), Breadth First Search (BFS), and MinCounter (MC). With two buckets and two slots per bucket, under the 95\% memory load rate of the maximum load rate, CC and ICC are optimized on read-write times over 20\% and 80\% compared to MC and BFS, respectively. Furthermore, the CC and ICC algorithms achieve a slight improvement in storage efficiency compared with MC. In addition, we implement RW, MC, and the proposed algorithms using fine-grained locking to support a high throughput rate. From the test on field programmable gate arrays, we verify the simulation results and our algorithms optimize the maximum throughput over 23\% compared to RW and 9\% compared to MC under 95\% of the memory capacity. The test results indicate that our CC and ICC algorithms can achieve better performance in terms of hardware bandwidth and memory load efficiency without incurring a significant resource cost.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "28", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Goel:2023:ISS, author = "Ashvin Goel and Dalit Naor", title = "Introduction to the Special Section on {USENIX FAST 2023}", journal = j-TOS, volume = "19", number = "4", pages = "29:1--29:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3612820", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Nov 16 05:44:56 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3612820", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "29", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Li:2023:HPR, author = "Pengfei Li and Yu Hua and Pengfei Zuo and Zhangyu Chen and Jiajie Sheng", title = "A High-performance {RDMA}-oriented Learned Key-value Store for Disaggregated Memory Systems", journal = j-TOS, volume = "19", number = "4", pages = "30:1--30:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3620674", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Nov 16 05:44:56 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3620674", abstract = "Disaggregated memory systems separate monolithic servers into different components, including compute and memory nodes, to enjoy the benefits of high resource utilization, flexible hardware scalability, and efficient data sharing. By exploiting the high-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "30", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Kadekodi:2023:PDC, author = "Saurabh Kadekodi and Shashwat Silas and David Clausen and Arif Merchant", title = "Practical Design Considerations for Wide Locally Recoverable Codes {(LRCs)}", journal = j-TOS, volume = "19", number = "4", pages = "31:1--31:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3626198", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Nov 16 05:44:56 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3626198", abstract = "Most of the data in large-scale storage clusters is erasure coded. At exascale, optimizing erasure codes for low storage overhead, efficient reconstruction, and easy deployment is of critical importance. Locally recoverable codes (LRCs) have deservedly \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "31", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Kim:2023:ESS, author = "Sang-Hoon Kim and Jaehoon Shim and Euidong Lee and Seongyeop Jeong and Ilkueon Kang and Jin-Soo Kim", title = "Empowering Storage Systems Research with {NVMeVirt}: a Comprehensive {NVMe} Device Emulator", journal = j-TOS, volume = "19", number = "4", pages = "32:1--32:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3625006", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Nov 16 05:44:56 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3625006", abstract = "There have been drastic changes in the storage device landscape recently. At the center of the diverse storage landscape lies the NVMe interface, which allows high-performance and flexible communication models required by these next-generation device \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "32", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Lu:2023:MMJ, author = "Ruiming Lu and Erci Xu and Yiming Zhang and Fengyi Zhu and Zhaosheng Zhu and Mengtian Wang and Zongpeng Zhu and Guangtao Xue and Jiwu Shu and Minglu Li and Jiesheng Wu", title = "From Missteps to Milestones: a Journey to Practical Fail-Slow Detection", journal = j-TOS, volume = "19", number = "4", pages = "33:1--33:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3617690", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Nov 16 05:44:56 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3617690", abstract = "The newly emerging ``fail-slow'' failures plague both software and hardware where the victim components are still functioning yet with degraded performance. To address this problem, this article presents Perseus, a practical fail-slow detection framework \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "33", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Sun:2023:SWF, author = "Jinghan Sun and Shaobo Li and Jun Xu and Jian Huang", title = "The Security War in File Systems: an Empirical Study from a Vulnerability-centric Perspective", journal = j-TOS, volume = "19", number = "4", pages = "34:1--34:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3606020", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Nov 16 05:44:56 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3606020", abstract = "This article presents a systematic study on the security of modern file systems, following a vulnerability-centric perspective. Specifically, we collected 377 file system vulnerabilities committed to the CVE database in the past 20 years. We characterize \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "34", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zhang:2023:HBS, author = "Yiming Zhang and Huiba Li and Shengyun Liu and Peng Huang", title = "Hybrid Block Storage for Efficient Cloud Volume Service", journal = j-TOS, volume = "19", number = "4", pages = "35:1--35:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3596446", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Nov 16 05:44:56 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3596446", abstract = "The migration of traditional desktop and server applications to the cloud brings challenge of high performance, high reliability, and low cost to the underlying cloud storage. To satisfy the requirement, this article proposes a hybrid cloud-scale block \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "35", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Gatla:2023:UPM, author = "Om Rameshwar Gatla and Duo Zhang and Wei Xu and Mai Zheng", title = "Understanding Persistent-memory-related Issues in the {Linux} Kernel", journal = j-TOS, volume = "19", number = "4", pages = "36:1--36:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3605946", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Nov 16 05:44:56 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/linux.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib", URL = "https://dl.acm.org/doi/10.1145/3605946", abstract = "Persistent memory (PM) technologies have inspired a wide range of PM-based system optimizations. However, building correct PM-based systems is difficult due to the unique characteristics of PM hardware. To better understand the challenges as well as the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "36", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Wu:2023:FFD, author = "Suzhen Wu and Zhanhong Tu and Yuxuan Zhou and Zuocheng Wang and Zhirong Shen and Wei Chen and Wei Wang and Weichun Wang and Bo Mao", title = "{FASTSync}: a {FAST} Delta Sync Scheme for Encrypted Cloud Storage in High-bandwidth Network Environments", journal = j-TOS, volume = "19", number = "4", pages = "37:1--37:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3607536", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Nov 16 05:44:56 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3607536", abstract = "More and more data are stored in cloud storage, which brings two major challenges. First, the modified files in the cloud should be quickly synchronized to ensure data consistency, e.g., delta synchronization (sync) achieves efficient cloud sync by \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "37", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Tong:2023:OFD, author = "Qiuyun Tong and Xinghua Li and Yinbin Miao and Yunwei Wang and Ximeng Liu and Robert H. Deng", title = "Owner-free Distributed Symmetric Searchable Encryption Supporting Conjunctive Queries", journal = j-TOS, volume = "19", number = "4", pages = "38:1--38:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3607255", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Nov 16 05:44:56 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3607255", abstract = "Symmetric Searchable Encryption (SSE), as an ideal primitive, can ensure data privacy while supporting retrieval over encrypted data. However, existing multi-user SSE schemes require the data owner to share the secret key with all query users or always be \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "38", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Li:2024:BLI, author = "Huiba Li and Zhihao Zhang and Yifan Yuan and Rui Du and Kai Ma and Lanzheng Liu and Yiming Zhang and Windsor Hsu", title = "Block-level Image Service for the Cloud", journal = j-TOS, volume = "20", number = "1", pages = "1:1--1:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3620672", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Feb 23 16:50:38 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", URL = "https://dl.acm.org/doi/10.1145/3620672", abstract = "Businesses increasingly need agile and elastic computing infrastructure to respond quickly to real-world situations. By offering efficient process-based virtualization and a layered image system, containers are designed to enable agile and elastic \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "1", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Cai:2024:EFN, author = "Miao Cai and Junru Shen and Bin Tang and Hao Huang and Baoliu Ye", title = "Exploiting Flat Namespace to Improve File System Metadata Performance on Ultra-Fast, Byte-Addressable {NVMs}", journal = j-TOS, volume = "20", number = "1", pages = "2:1--2:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3620673", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Feb 23 16:50:38 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3620673", abstract = "The conventional file system provides a hierarchical namespace by structuring it as a directory tree. Tree-based namespace structure leads to inefficient file path walk and expensive namespace tree traversal, underutilizing ultra-low access latency and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "2", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Xu:2024:SWL, author = "Wang Xu and Israel Koren", title = "A Scalable Wear Leveling Technique for Phase Change Memory", journal = j-TOS, volume = "20", number = "1", pages = "3:1--3:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3631146", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Feb 23 16:50:38 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3631146", abstract = "Phase Change Memory (PCM), one of the recently proposed non-volatile memory technologies, has been suffering from low write endurance. For example, a single-layer PCM cell could only be written approximately $ 10^8 $. This limits the lifetime of a PCM-based \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "3", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Kim:2024:LTA, author = "Donguk Kim and Jongsung Lee and Keun Soo Lim and Jun Heo and Tae Jun Ham and Jae W. Lee", title = "An {LSM} Tree Augmented with {B+} Tree on Nonvolatile Memory", journal = j-TOS, volume = "20", number = "1", pages = "4:1--4:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3633475", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Feb 23 16:50:38 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3633475", abstract = "Modern log-structured merge (LSM) tree-based key-value stores are widely used to process update-heavy workloads effectively as the LSM tree sequentializes write requests to a storage device to maximize storage performance. However, this append-only \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "4", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Sun:2024:GUG, author = "Hui Sun and Jinfeng Xu and Xiangxiang Jiang and Guanzhong Chen and Yinliang Yue and Xiao Qin", title = "{gLSM}: Using {GPGPU} to Accelerate Compactions in {LSM}-tree-based Key-value Stores", journal = j-TOS, volume = "20", number = "1", pages = "5:1--5:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3633782", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Feb 23 16:50:38 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3633782", abstract = "Log-structured-merge tree or LSM-tree is a technological underpinning in key-value (KV) stores to support a wide range of performance-critical applications. By conducting data re-organization in the background by virtue of compaction operations, the KV \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "5", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Wang:2024:EEP, author = "Shucheng Wang and Qiang Cao and Hong Jiang and Ziyi Lu and Jie Yao and Yuxing Chen and Anqun Pan", title = "Explorations and Exploitation for Parity-based {RAIDs} with Ultra-fast {SSDs}", journal = j-TOS, volume = "20", number = "1", pages = "6:1--6:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3627992", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri Feb 23 16:50:38 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3627992", abstract = "Following a conventional design principle that pays more fast-CPU-cycles for fewer slow-I/Os, popular software storage architecture Linux Multiple-Disk (MD) for parity-based RAID (e.g., RAID5 and RAID6) assigns one or more centralized worker threads to efficiently process all user requests based on multi-stage asynchronous control and global data structures, successfully exploiting characteristics of slow devices, e.g., Hard Disk Drives (HDDs). However, we observe that, with high-performance NVMe-based Solid State Drives (SSDs), even the recently added multi-worker processing mode in MD achieves only limited performance gain because of the severe lock contentions under intensive write workloads. In this paper, we propose a novel stripe-threaded RAID architecture, StRAID, assigning a dedicated worker thread for each stripe-write (one-for-one model) to sufficiently exploit high parallelism inherent among RAID stripes, multi-core processors, and SSDs. For the notoriously performance-punishing partial-stripe writes that induce extra read and write I/Os, StRAID presents a two-stage stripe write mechanism and a two-dimensional multi-log SSD buffer. All writes first are opportunistically batched in memory, and then are written into the primary RAID for aggregated full-stripe writes or conditionally redirected to the buffer for partial-stripe writes. These buffered data are strategically reclaimed to the primary RAID. We evaluate a StRAID prototype with a variety of benchmarks and real-world traces. StRAID is demonstrated to outperform MD by up to 5.8 times in write throughput.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "6", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Williams:2024:ISS, author = "Dan Williams and Julia Lawall", title = "Introduction to the Special Section on {USENIX ATC 2023}", journal = j-TOS, volume = "20", number = "2", pages = "7:1--7:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3635156", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri May 10 08:18:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3635156", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "7", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zhou:2024:CAC, author = "Yuanhui Zhou and Jian Zhou and Kai Lu and Ling Zhan and Peng Xu and Peng Wu and Shuning Chen and Xian Liu and Jiguang Wan", title = "A Contract-aware and Cost-effective {LSM} Store for Cloud Storage with Low Latency Spikes", journal = j-TOS, volume = "20", number = "2", pages = "8:1--8:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643851", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri May 10 08:18:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3643851", abstract = "Cloud storage is gaining popularity because features such as pay-as-you-go significantly reduce storage costs. However, the community has not sufficiently explored its contract model and latency characteristics. As LSM-Tree-based key-value stores (LSM \ldots{})", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "8", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Wang:2024:PSI, author = "Jing Wang and Youyou Lu and Qing Wang and Yuhao Zhang and Jiwu Shu", title = "{Perseid}: a Secondary Indexing Mechanism for {LSM}-Based Storage Systems", journal = j-TOS, volume = "20", number = "2", pages = "9:1--9:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3633285", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri May 10 08:18:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3633285", abstract = "LSM-based storage systems are widely used for superior write performance on block devices. However, they currently fail to efficiently support secondary indexing, since a secondary index query operation usually needs to retrieve multiple small values, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "9", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Jang:2024:BSH, author = "Junhyeok Jang and Hanjin Choi and Hanyeoreum Bae and Seungjun Lee and Miryeong Kwon and Myoungsoo Jung", title = "Bridging Software-Hardware for {CXL} Memory Disaggregation in Billion-Scale Nearest Neighbor Search", journal = j-TOS, volume = "20", number = "2", pages = "10:1--10:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3639471", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri May 10 08:18:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3639471", abstract = "We propose CXL-ANNS, a software-hardware collaborative approach to enable scalable approximate nearest neighbor search (ANNS) services. To this end, we first disaggregate DRAM from the host via compute express link (CXL) and place all essential datasets \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "10", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Paul:2024:TAE, author = "Arnab K. Paul and Sarah Neuwirth and Bharti Wadhwa and Feiyi Wang and Sarp Oral and Ali R. Butt", title = "{Tarazu}: an Adaptive End-to-end {I/O} Load-balancing Framework for Large-scale Parallel File Systems", journal = j-TOS, volume = "20", number = "2", pages = "11:1--11:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3641885", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri May 10 08:18:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3641885", abstract = "The imbalanced I/O load on large parallel file systems affects the parallel I/O performance of high-performance computing (HPC) applications. One of the main reasons for I/O imbalances is the lack of a global view of system-wide resource consumption. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "11", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Wei:2024:EDP, author = "Junyu Wei and Guangyan Zhang and Junchao Chen and Yang Wang and Weimin Zheng and Tingtao Sun and Jiesheng Wu and Jiangwei Jiang", title = "Exploiting Data-pattern-aware Vertical Partitioning to Achieve Fast and Low-cost Cloud Log Storage", journal = j-TOS, volume = "20", number = "2", pages = "12:1--12:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643641", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri May 10 08:18:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3643641", abstract = "Cloud logs can be categorized into on-line, off-line, and near-line logs based on the access frequency. Among them, near-line logs are mainly used for debugging, which means they prefer a low query latency for better user experience. Besides, the storage \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "12", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Wu:2024:PSB, author = "Jiaojiao Wu and Zhigang Cai and Fan Yang and Jun Li and Francois Trahay and Zheng Yang and Chao Wang and Jianwei Liao", title = "Polling Sanitization to Balance {I/O} Latency and Data Security of High-density {SSDs}", journal = j-TOS, volume = "20", number = "2", pages = "13:1--13:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3639826", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Fri May 10 08:18:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3639826", abstract = "Sanitization is an effective approach for ensuring data security through scrubbing invalid but sensitive data pages, with the cost of impacts on storage performance due to moving out valid pages from the sanitization-required wordline, which is a logical read/write unit and consists of multiple pages in high-density SSDs. To minimize the impacts on I/O latency and data security, this article proposes a polling-based scheduling approach for data sanitization in high-density SSDs. Our method polls a specific SSD channel for completing data sanitization at the block granularity, meanwhile other channels can still service I/O requests. Furthermore, our method assigns a low priority to the blocks that are more likely to have future adjacent page invalidations inside sanitization-required wordlines, while selecting the sanitization block, to minimize the negative impacts of moving valid pages. Through a series of emulation experiments on several disk traces of real-world applications, we show that our proposal can decrease the negative effects of data sanitization in terms of the risk-performance index, which is a united time metric of I/O responsiveness and the unsafe time interval, by 16.34\%, on average, compared to related sanitization methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "13", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Geambasu:2024:ISS, author = "Roxana Geambasu and Ed Nightingale", title = "Introduction to the Special Section on {USENIX OSDI 2023}", journal = j-TOS, volume = "20", number = "3", pages = "14:1--14:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3654801", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Jun 18 05:40:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3654801", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "14", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Luo:2024:MDR, author = "Xuchuan Luo and Pengfei Zuo and Jiacheng Shen and Jiazhen Gu and Xin Wang and Michael Lyu and Yangfan Zhou", title = "A Memory-Disaggregated Radix Tree", journal = j-TOS, volume = "20", number = "3", pages = "15:1--15:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3664289", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Jun 18 05:40:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3664289", abstract = "Disaggregated memory (DM) is an increasingly prevalent architecture with high resource utilization. It separates computing and memory resources into two pools and interconnects them with fast networks. Existing range indexes on DM are based on B+ trees, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "15", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Min:2024:EEZ, author = "Jaehong Min and Chenxingyu Zhao and Ming Liu and Arvind Krishnamurthy", title = "{eZNS}: Elastic Zoned Namespace for Enhanced Performance Isolation and Device Utilization", journal = j-TOS, volume = "20", number = "3", pages = "16:1--16:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3653716", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Jun 18 05:40:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3653716", abstract = "Emerging Zoned Namespace (ZNS) SSDs, providing the coarse-grained zone abstraction, hold the potential to significantly enhance the cost efficiency of future storage infrastructure and mitigate performance unpredictability. However, existing ZNS SSDs have \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "16", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Li:2024:LEA, author = "Chenxing Li and Sidi Mohamed Beillahi and Guang Yang and Ming Wu and Wei Xu and Fan Long", title = "{LVMT}: an Efficient Authenticated Storage for Blockchain", journal = j-TOS, volume = "20", number = "3", pages = "17:1--17:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3664818", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Jun 18 05:40:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3664818", abstract = "Authenticated storage access is the performance bottleneck of a blockchain, because each access can be amplified to potentially O (log n ) disk I/O operations in the standard Merkle Patricia Trie (MPT) storage structure. In this article, we propose a multi-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "17", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zhao:2024:EEH, author = "Nannan Zhao and Muhui Lin and Hadeel Albahar and Arnab K. Paul and Zhijie Huan and Subil Abraham and Keren Chen and Vasily Tarasov and Dimitrios Skourtis and Ali Anwar and Ali Butt", title = "An End-to-end High-performance Deduplication Scheme for {Docker} Registries and {Docker} Container Storage Systems", journal = j-TOS, volume = "20", number = "3", pages = "18:1--18:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643819", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Jun 18 05:40:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3643819", abstract = "The wide adoption of Docker containers for supporting agile and elastic enterprise applications has led to a broad proliferation of container images. The associated storage performance and capacity requirements place a high pressure on the infrastructure \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "18", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Li:2024:FCS, author = "Jiahao Li and Jingbo Su and Luofan Chen and Cheng Li and Kai Zhang and Liang Yang and Sam Noh and Yinlong Xu", title = "{Fastmove}: a Comprehensive Study of On-Chip {DMA} and its Demonstration for Accelerating Data Movement in {NVM}-based Storage Systems", journal = j-TOS, volume = "20", number = "3", pages = "19:1--19:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3656477", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Jun 18 05:40:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3656477", abstract = "Data-intensive applications executing on NVM-based storage systems experience serious bottlenecks when moving data between DRAM and NVM. We advocate for the use of the long-existing but recently neglected on-chip DMA to expedite data movement with three \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "19", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Stylianakis:2024:ISE, author = "Giorgos Stylianakis and Giorgos Saloustros and Orestis Chiotakis and Giorgos Xanthakis and Angelos Bilas", title = "Index Shipping for Efficient Replication in {LSM} Key--Value Stores with Hybrid {KV} Placement", journal = j-TOS, volume = "20", number = "3", pages = "20:1--20:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3658672", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Jun 18 05:40:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3658672", abstract = "Key-value (KV) stores based on the LSM tree have become a foundational layer in the storage stack of datacenters and cloud services. Current approaches for achieving reliability and availability favor reducing network traffic and send to replicas only new \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "20", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zhang:2024:DID, author = "Tiangmeng Zhang and Renhui Chen and Zijing Li and Congming Gao and Chengke Wang and Jiwu Shu", title = "Design and Implementation of Deduplication on {F2FS}", journal = j-TOS, volume = "20", number = "4", pages = "21:1--21:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3662735", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Aug 20 06:19:19 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3662735", abstract = "Data deduplication technology has gained popularity in modern file systems due to its ability to eliminate redundant writes and improve storage space efficiency. In recent years, the flash-friendly file system (F2FS) has been widely adopted in flash \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "21", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Du:2024:FFA, author = "Chunfeng Du and Zihang Lin and Suzhen Wu and Yifei Chen and Jiapeng Wu and Shengzhe Wang and Weichun Wang and Qingfeng Wu and Bo Mao", title = "{FSDedup}: Feature-Aware and Selective Deduplication for Improving Performance of Encrypted Non-Volatile Main Memory", journal = j-TOS, volume = "20", number = "4", pages = "22:1--22:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3662736", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Aug 20 06:19:19 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3662736", abstract = "Enhancing the endurance, performance, and energy efficiency of encrypted Non-Volatile Main Memory (NVMM) can be achieved by minimizing written data through inline deduplication. However, existing approaches applying inline deduplication to encrypted NVMM \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "22", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Tan:2024:DFD, author = "Haoliang Tan and Wen Xia and Xiangyu Zou and Cai Deng and Qing Liao and Zhaoquan Gu", title = "The Design of Fast Delta Encoding for Delta Compression Based Storage Systems", journal = j-TOS, volume = "20", number = "4", pages = "23:1--23:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3664817", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Aug 20 06:19:19 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3664817", abstract = "Delta encoding is a data reduction technique capable of calculating the differences (i.e., delta) among very similar files and chunks. It is widely used for various applications, such as synchronization replication, backup/archival storage, cache \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "23", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Raaf:2024:SBH, author = "Patrick Raaf and Andr{\'e} Brinkmann and Eric Borba and Hossein Asadi and Sai Narasimhamurthy and John Bent and Mohamad El-Batal and Reza Salkhordeh", title = "From {SSDs} Back to {HDDs}: Optimizing {VDO} to Support Inline Deduplication and Compression for {HDDs} as Primary Storage Media", journal = j-TOS, volume = "20", number = "4", pages = "24:1--24:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3678250", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Aug 20 06:19:19 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3678250", abstract = "Deduplication and compression are powerful techniques to reduce the ratio between the quantity of logical data stored and the physical amount of consumed storage. Deduplication can impose significant performance overheads, as duplicate detection for large \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "24", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Chun:2024:RIS, author = "Myoungjun Chun and Myungsuk Kim and Dusol Lee and Jisung Park and Jihong Kim", title = "{ReadGuard}: Integrated {SSD} Management for Priority-Aware Read Performance Differentiation", journal = j-TOS, volume = "20", number = "4", pages = "25:1--25:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3676884", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Aug 20 06:19:19 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3676884", abstract = "When multiple apps with different I/O priorities share a high-performance SSD, it is important to differentiate the I/O QoS level based on the I/O priority of each app. In this paper, we study how a modern flash-based SSD should be designed to support \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "25", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Yao:2024:ECS, author = "Xiangyu Yao and Qiao Li and Kaihuan Lin and Xinbiao Gan and Jie Zhang and Congming Gao and Zhirong Shen and Quanqing Xu and Chuanhui Yang and Jason Xue", title = "Extremely-Compressed {SSDs} with {I/O} Behavior Prediction", journal = j-TOS, volume = "20", number = "4", pages = "26:1--26:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3677044", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Aug 20 06:19:19 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3677044", abstract = "As the data volume continues to grow exponentially, there is an increasing demand for large storage system capacity. Data compression techniques effectively reduce the volume of written data, enhancing space efficiency. As a result, many modern SSDs have already incorporated data compression capabilities. However, data compression introduces additional processing overhead in critical I/O paths, potentially affecting system performance. Currently, most compression solutions in flash-based storage systems employ fixed compression algorithms for all incoming data without leveraging differences among various data access patterns. This leads to sub-optimal compression efficiency.\par This article proposes a data-type-aware Flash Translation Layer (DAFTL) scheme to maximize space efficiency without compromising system performance. First, we propose an I/O behavior prediction method to forecast future access on specific data. Then, DAFTL matches data types with distinct I/O behaviors to compression algorithms of varying intensities, achieving an optimal balance between performance and space efficiency. Specifically, it employs higher-intensity compression algorithms for less frequently accessed data to maximize space efficiency. For frequently accessed data, it utilizes lower-intensity but faster compression algorithms to maintain system performance. Finally, an improved compact compression method is proposed to effectively eliminate page fragmentation and further enhance space efficiency. Extensive evaluations using a variety of real-world workloads, as well as the workloads with real data we collected on our platforms, demonstrate that DAFTL achieves more data reductions than other approaches. When compared to the state-of-the-art compression schemes, DAFTL reduces the total number of pages written to the SSD by an average of 8\%, 21.3\%, and 25.6\% for data with high, medium, and low compressibility, respectively. In the case of workloads with real data, DAFTL achieves an average reduction of 10.4\% in the total number of pages written to SSD. Furthermore, DAFTL exhibits comparable or even improved read and write performance compared to other solutions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "26", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zhao:2024:EDR, author = "Jia Zhao and Zuoru Yang and Jingwei Li and Patrick P. C. Lee", title = "Encrypted Data Reduction: Removing Redundancy from Encrypted Data in Outsourced Storage", journal = j-TOS, volume = "20", number = "4", pages = "27:1--27:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3685278", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Aug 20 06:19:19 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3685278", abstract = "Storage savings and data confidentiality are two primary goals for outsourced storage. However, encryption by design destroys the content redundancy within plaintext data, so there exist design tensions when combining encryption with data reduction \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "27", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Kuenning:2025:PPFa, author = "Geoff Kuenning and Youjip Won and Ming Zhao and Erez Zadok", title = "The Past, Present, and Future of Storage Technologies (Part 1 of 2)", journal = j-TOS, volume = "21", number = "1", pages = "1:1--1:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3709140", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Feb 13 06:13:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", note = "See also part 2 \cite{Kuenning:2025:PPFb}.", URL = "https://dl.acm.org/doi/10.1145/3709140", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "1", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zhang:2025:SAS, author = "Xiangqun Zhang and Janki Bhimani and Shuyi Pei and Eunji Lee and Sungjin Lee and Yoon Jae Seong and Eui Jin Kim and Changho Choi and Eyee Hyun Nam and Jongmoo Choi and Bryan S. Kim", title = "Storage Abstractions for {SSDs}: The Past, Present, and Future", journal = j-TOS, volume = "21", number = "1", pages = "2:1--2:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3708992", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Feb 13 06:13:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3708992", abstract = "This article traces the evolution of SSD (solid-state drive) interfaces, examining the transition from the block storage paradigm inherited from hard disk drives to SSD-specific standards customized to flash memory. Early SSDs conformed to the block \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "2", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Cheriere:2025:HSC, author = "Nathana{\"e}l Cheriere and Jiaqi Chu and Grace Brennan and Pashmina Cameron and Pedro {Da Costa} and Jannes Gladrow and Guilherme Ilunga and Douglas Kelly and Sarah Lewis and Joowon Lim and Giorgio Maltese and Tony Mason and Greg O'Shea and Soujanya Ponnapalli and Michael Rudow and Alan Sanders and Theano Stavrinos and Xingbo Wu and Mengyang Yang and Dushyanth Narayanan and Benn Thomsen and Antony Rowstron", title = "Holographic Storage for the Cloud: advances and challenges", journal = j-TOS, volume = "21", number = "1", pages = "3:1--3:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3708993", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Feb 13 06:13:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3708993", abstract = "Holographic Storage is an old idea that has always promised high density and fast random access, but has never been commercially competitive with Hard Disk Drives (HDDs) and Solid State Devices (SSDs). In Project HSD at Microsoft Research we asked the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "3", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Shen:2025:SPP, author = "Zhirong Shen and Yuhui Cai and Keyun Cheng and Patrick P. C. Lee and Xiaolu Li and Yuchong Hu and Jiwu Shu", title = "A Survey of the Past, Present, and Future of Erasure Coding for Storage Systems", journal = j-TOS, volume = "21", number = "1", pages = "4:1--4:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3708994", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Feb 13 06:13:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3708994", abstract = "Erasure coding is a known redundancy technique that has been popularly deployed in modern storage systems to protect against failures. By introducing a small portion of coded redundancy into data storage, erasure coding is shown to provide higher \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "4", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Li:2025:FOC, author = "Zhiyue Li and Guangyan Zhang and Yang Wang", title = "Flash-oriented Coded Storage: Research Status and Future Directions", journal = j-TOS, volume = "21", number = "1", pages = "5:1--5:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3708995", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Feb 13 06:13:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3708995", abstract = "Flash-based solid-state drives (SSDs) have been widely adopted in various storage systems, manifesting better performance than their forerunner HDDs. However, the characteristics of flash media post some drawbacks when deploying SSD-based storage systems. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "5", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Lantz:2025:MTS, author = "Mark A. Lantz and Simeon Furrer and Martin Petermann and Hugo Rothuizen and Stella Brach and Luzius Kronig and Ilias Iliadis and Beat Weiss and Ed R. Childers and David Pease", title = "Magnetic Tape Storage Technology", journal = j-TOS, volume = "21", number = "1", pages = "6:1--6:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3708997", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Feb 13 06:13:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3708997", abstract = "Magnetic tape provides a cost-effective way to retain the exponentially increasing volumes of data being created in recent years. The low cost per terabyte combined with tape's low energy consumption make it an appealing option for storing infrequently \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "6", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Anderson:2025:PST, author = "Patrick Anderson and Erika Aranas and Youssef Assaf and Raphael Behrendt and Richard Black and Marco Caballero and Pashmina Cameron and Burcu Canakci and Andromachi Chatzieleftheriou and Rebekah Clarke and James Clegg and Daniel Cletheroe and Bridgette Cooper and Thales {De Carvalho} and Tim Deegan and Austin Donnelly and Rokas Drevinskas and Alexander Gaunt and Christos Gkantsidis and Ariel Gomez Diaz and Istvan Haller and Freddie Hong and Teodora Ilieva and Shashidhar Joshi and Russell Joyce and William Kunkel and David Lara and Sergey Legtchenko and Fanglin Liu and Bruno Magalhaes and Alana Marzoev and Marvin McNett and Jayashree Mohan and Michael Myrah and Truong Nguyen and Sebastian Nowozin and Aaron Ogus and Hiske Overweg and Antony Rowstron and Maneesh Sah and Masaaki Sakakura and Peter Scholtz and Nina Schreiner and Omer Sella and Adam Smith and Ioan Stefanovici and David Sweeney and Benn Thomsen and Govert Verkes and Phil Wainman and Jonathan Westcott and Luke Weston and Charles Whittaker and Pablo Wilke Berenguer and Hugh Williams and Thomas Winkler and Stefan Winzeck", title = "{Project Silica}: Towards Sustainable Cloud Archival Storage in Glass", journal = j-TOS, volume = "21", number = "1", pages = "7:1--7:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3708996", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Feb 13 06:13:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3708996", abstract = "Sustainable and cost-effective long-term storage remains an unsolved problem. The most widely used storage technologies today are magnetic (hard disk drives and tape). They use media that degrades over time and has a limited lifetime, which leads to inefficient, wasteful, and costly solutions for long-lived data. This article presents Silica: the first cloud storage system for archival data underpinned by quartz glass, an extremely resilient media that allows data to be left in situ indefinitely. The hardware and software of Silica have been co-designed and co-optimized from the media up to the service level with sustainability as a primary objective. The design follows a cloud-first, data-driven methodology underpinned by principles derived from analyzing the archival workload of a large public cloud service. Silica can support a wide range of archival storage workloads and ushers in a new era of sustainable, cost-effective storage.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "7", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Ma:2025:ISS, author = "Xiasong Ma and Won Youjip", title = "Introduction to the Special Section on {USENIX FAST 2024}", journal = j-TOS, volume = "21", number = "2", pages = "8:1--8:??", year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3716633", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 25 09:37:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3716633", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "8", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Yang:2025:EER, author = "Zhe Yang and Qing Wang and Xiaojian Liao and Youyou Lu and Keji Huang and Jiwu Shu", title = "Efficiently Enlarging {RDMA}-Attached Memory with {SSD}", journal = j-TOS, volume = "21", number = "2", pages = "9:1--9:??", year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3700772", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 25 09:37:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3700772", abstract = "RDMA-based in-memory storage systems offer high performance but are restricted by the capacity of physical memory. In this article, we propose TeRM to extend RDMA-attached memory with SSD. TeRM achieves fast remote access on the SSD-extended memory by \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "9", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Xu:2025:ECB, author = "Erci Xu and Weidong Zhang and Qiuping Wang and Xiaolu Zhang and Yuesheng Gu and Zhenwei Lu and Tao Ouyang and Guanqun Dong and Wenwen Peng and Zhe Xu and Shuo Zhang and Dong Wu and Yilei Peng and Tianyun Wang and Haoran Zhang and Jiasheng Wang and Wenyuan Yan and Yuanyuan Dong and Wenhui Yao and Zhongjie Wu and Lingjun Zhu and Chao Shi and Yinhu Wang and Rong Liu and Junping Wu and Jiaji Zhu and Jiesheng Wu", title = "Evolving the Cloud Block Store with Performance, Elasticity, Availability, and Hardware Offloading", journal = j-TOS, volume = "21", number = "2", pages = "10:1--10:??", year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3705925", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 25 09:37:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3705925", abstract = "In this paper, we qualitatively and quantitatively discuss the design choices, production experience, and lessons in building the Elastic Block Storage (EBS) at Alibaba Cloud over the past decade. To cope with hardware advancement and users' demands, we \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "10", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Yang:2025:LDP, author = "Tsun-Yu Yang and Yizou Chen and Yuhong Liang and Ming-Chang Yang", title = "Leveraging On-demand Processing to Co-optimize Scalability and Efficiency for Fully-external Graph Computation", journal = j-TOS, volume = "21", number = "2", pages = "11:1--11:??", year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3701037", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 25 09:37:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3701037", abstract = "Fully-external graph computation systems exhibit optimal scalability by computing the ever-growing, large-scale graph with a constant amount of memory on a single machine. In particular, they keep the entire massive graph data in storage and iteratively \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "11", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Wertenbroek:2025:PLB, author = "Rick Wertenbroek and Yann Thoma and Alberto Dassatti", title = "A Portable {Linux}-based Firmware for {NVMe} Computational Storage Devices", journal = j-TOS, volume = "21", number = "2", pages = "12:1--12:??", year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3697352", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 25 09:37:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/linux.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib", URL = "https://dl.acm.org/doi/10.1145/3697352", abstract = "Over the years, interest in computational storage devices has been growing steadily. This is largely due to the rise of data-intensive applications, such as machine learning, online video distribution, astrophysics, and genomics. Moving compute operations closer to the data provides benefits in terms of scaling possibilities and energy efficiency. The development of computational storage devices has been limited by the need for specialized and complex hardware. In this work, we propose a portable Linux-based firmware framework for the development of NVMe computational storage devices. Our firmware runs on a variety of hardware platforms ranging from expensive FPGA solutions to inexpensive off-the-shelf single board computers. The firmware leverages the vast Linux software ecosystem to facilitate the development and prototyping of novel computational storage devices. We benchmark our firmware on multiple hardware platforms and demonstrate its versatility through several computational examples including a content-aware disk image search engine based on natural language processing and AI-driven image recognition.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "12", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Liu:2025:WWA, author = "Yubo Liu and Yongfeng Wang and Zhiguang Chen and Yutong Lu and Ming Zhao", title = "{WALSH}: Write-Aggregating Log-Structured Hashing for Hybrid Memory", journal = j-TOS, volume = "21", number = "2", pages = "13:1--13:??", year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3715010", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 25 09:37:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3715010", abstract = "Persistent memory (PM) brings important opportunities for improving data storage including the widely used hash tables. However, PM is not friendly to small writes, which causes existing PM hashes to suffer from high hardware write amplification. Hybrid \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "13", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Brinkmann:2025:HTM, author = "Andr{\'e} Brinkmann and Reza Salkhordeh and Florian Wiegert and Peng Wang and Yao Xin and Renhai Chen and Huang Keji and Gong Zhang", title = "{HLN-Tree}: a memory-efficient {B+}-Tree with huge leaf nodes and locality predictors", journal = j-TOS, volume = "21", number = "2", pages = "14:1--14:??", year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3707641", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 25 09:37:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3707641", abstract = "Key--value stores in Cloud environments can contain more than $ 2^{45} $ unique elements and be larger than 100 PByte. B$^+$-Trees are well suited for these larger-than-memory datasets and seamlessly index data stored on thousands of secondary storage devices. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "14", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Yang:2025:DCA, author = "Jin Yang and Heejin Yoon and Gyeongchan Yun and Sam H. Noh and Young-Ri Choi", title = "A Dynamic Characteristic Aware Index Structure Optimized for Real-world Datasets", journal = j-TOS, volume = "21", number = "2", pages = "15:1--15:??", year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3707642", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 25 09:37:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3707642", abstract = "Many datasets in real life are complex and dynamic, that is, their key densities are varied over the whole key space and their key distributions change over time. It is challenging for an index structure to efficiently support all key operations for data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "15", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Liu:2025:HOL, author = "Gang Liu and Zheng Xiao and kenli Li and Rujia Wang", title = "{HM-ORAM}: a Lightweight Crash-consistent {ORAM} Framework on Hybrid Memory System", journal = j-TOS, volume = "21", number = "2", pages = "16:1--16:??", year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3715009", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 25 09:37:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3715009", abstract = "Byte-addressable non-volatile memory (NVM) is a promising alternative technology for main memory, allowing the processor to access persistent data in the main memory directly. Systems with emerging NVM as the main memory still suffer from information \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "16", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Liang:2025:ZBW, author = "Yuhong Liang and Yingjia Wang and Tsun-Yu Yang and Matias Bj{\o}rling and Ming-Chang Yang", title = "{ZonesDB}: Building Write-Optimized and Space-Adaptive Key--Value Store on Zoned Storage with Fragmented {LSM} Tree", journal = j-TOS, volume = "21", number = "2", pages = "17:1--17:??", year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3715331", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 25 09:37:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3715331", abstract = "The zoned storage has revolutionized the decades-old block storage in lowering the cost-per-gigabyte while enabling the host system to achieve better performance. With such benefit of cost and performance, we still require careful consideration on the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "17", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Wang:2025:SHP, author = "Rui Wang and Weixu Zong and Shuibing He and Yongkun Li and Yinlong Xu", title = "Scalable and High-Performance Large-Scale Dynamic Graph Storage and Processing System", journal = j-TOS, volume = "21", number = "2", pages = "18:1--18:??", year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3715332", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Mar 25 09:37:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", URL = "https://dl.acm.org/doi/10.1145/3715332", abstract = "Existing in-memory graph storage systems that rely on DRAM have scalability issues because of the limited capacity and volatile nature of DRAM. The emerging persistent memory (PMEM) offers us a chance to solve these issues through its larger capacity and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Storage", articleno = "18", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Kuenning:2025:PPFb, author = "Geoff Kuenning and Youjip Won and Ming Zhao and Erez Zadok", title = "The Past, Present, and Future of Storage Technologies (part 2 of 2)", journal = j-TOS, volume = "21", number = "3", pages = "19:1--19:3", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3744914", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Sep 23 07:07:38 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", note = "See also part 1 \cite{Kuenning:2025:PPFa}.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Sensintaffar:2025:AAD, author = "Alex Sensintaffar and Yixun Wei and Li Ou and David Du and Bingzhe Li", title = "Advancing Archival Data Storage: The Promises and Challenges of {DNA} Storage System", journal = j-TOS, volume = "21", number = "3", pages = "20:1--20:34", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3723166", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Sep 23 07:07:38 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "As the volume of data is rapidly produced every day, there is a need for the storage media to keep up with the growth rate of digital data created. Despite emerging storage solutions that have been proposed such as Solid State Drive with quad-level cells \ldots{}", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{George:2025:LUE, author = "Anjus George and Andreas Dilger and Michael J. Brim and Richard Mohr and Amir Shehata and Jong Youl Choi and Ahmad Maroof Karimi and Jesse Hanley and James Simmons and Dominic Manno and Veronica Melesse Vergara and Sarp Oral and Christopher Zimmer", title = "{Lustre} Unveiled: Evolution, Design, Advancements, and Current Trends", journal = j-TOS, volume = "21", number = "3", pages = "21:1--21:109", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3736583", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Sep 23 07:07:38 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "The Lustre filesystem serves as a vital element in high-performance parallel storage, meeting the rising demands of scientific, research, and enterprise environments. Widely deployed across HPC environments, ranging from small-scale applications in AI/ML, \ldots{}", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Khan:2025:RPY, author = "Babar Khan and Andreas Koch", title = "Reflecting on the Past 17 Years of Shingled Magnetic Recording for Insights Into Future Disk Transitions: a Survey", journal = j-TOS, volume = "21", number = "3", pages = "22:1--22:50", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3731453", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Sep 23 07:07:38 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Shingled magnetic recording (SMR) is a data storage recording technology used in modern hard disk drives (HDDs) to increase the areal density capacity (ADC) of underlying media. The research on SMR drives began around 2008, with the first SMR disk \ldots{}", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Karim:2025:NDS, author = "Sajad Karim and Johannes W{\"u}nsche and Michael Kuhn and Gunter Saake and David Broneske", title = "{NVM} in Data Storage: a Post-{Optane} Future", journal = j-TOS, volume = "21", number = "3", pages = "23:1--23:85", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3731454", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Sep 23 07:07:38 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "The dynamic evolution of non-volatile memory (NVM) technologies from Read-Only Memory (ROM) to flash memory, and recent innovations in Magnetoresistive RAM (MRAM), Phase Change Memory (PCM), and Resistive RAM (ReRAM) signify a pivotal shift in data \ldots{}", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Boukhobza:2025:SFM, author = "Jalil Boukhobza and Pierre Olivier and Wen Sheng Lim and Liang-Chi Chen and Yun-Shan Hsieh and Shin-Ting Wu and Chien-Chung Ho and Po-Chun Huang and Yuan-Hao Chang", title = "A Survey on Flash-Memory Storage Systems: a Host-Side Perspective", journal = j-TOS, volume = "21", number = "3", pages = "24:1--24:59", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3723167", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Tue Sep 23 07:07:38 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "NAND flash memory has become the dominant storage media choice in a vast majority of application scenarios. Compared to mechanical hard disks, flash offers better access performance, energy efficiency, and shock resistance. However, the unique hardware \ldots{}", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Gavrilovska:2025:ISS, author = "Ada Gavrilovska and Douglas Terry", title = "Introduction to the Special Section on {USENIX OSDI 2024}", journal = j-TOS, volume = "21", number = "4", pages = "25:1--25:2", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3773914", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 18 09:33:56 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{McAllister:2025:FSC, author = "Sara McAllister and Yucong Wang and Benjamin Berg and Daniel S. Berger and Nathan Beckmann and George Amvrosiadis and Gregory R. Ganger", title = "{FairyWREN}: a Sustainable Cache for Emerging Write-Read-Erase Flash Interfaces", journal = j-TOS, volume = "21", number = "4", pages = "26:1--26:34", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3718390", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 18 09:33:56 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Leblanc:2025:SUR, author = "Hayley Leblanc and Nathan Taylor and James Bornholt and Vijay Chidambaram", title = "{SquirrelFS}: Using the {Rust} Compiler to Check File-System Crash Consistency", journal = j-TOS, volume = "21", number = "4", pages = "27:1--27:39", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3769109", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 18 09:33:56 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/rust.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zhao:2025:ISS, author = "Ming Zhao and Benjamin Reed", title = "Introduction to the Special Section on {MSST 2024}", journal = j-TOS, volume = "21", number = "4", pages = "28:1--28:2", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3772725", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 18 09:33:56 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zhu:2025:SLS, author = "Weidong Zhu and Grant Hernandez and Washington Garcia and Tian, Dave (Jing) and Sara Rampazzi and Kevin R. B. Butler", title = "{SrFTL}: Leveraging Storage Semantics for Effective Ransomware Defense in Flash-based {SSDs}", journal = j-TOS, volume = "21", number = "4", pages = "29:1--29:42", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3767322", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 18 09:33:56 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Jia:2025:HPS, author = "Wenqing Jia and Dejun Jiang and Jin Xiong", title = "A High-Performance and Scalable Userspace Log-Structured File System for Modern {SSDs}", journal = j-TOS, volume = "21", number = "4", pages = "30:1--30:41", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3728645", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 18 09:33:56 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zhang:2025:EDC, author = "Yucheng Zhang and Wenbin Zeng and Hong Jiang and Dan Feng and Zichen Xu and Shuibing He and Mingzhe Zhang and Dan Wu", title = "An Efficient Delta Compression Framework Seamlessly Integrated into Inline Deduplication", journal = j-TOS, volume = "21", number = "4", pages = "31:1--31:30", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3721485", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 18 09:33:56 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Park:2025:TTE, author = "Jaeyong Park and Sangun Choi and Jongmin Kim and GunJae Koo and Myung Kuk Yoon and Yunho Oh", title = "{TM-Training}: an Energy-Efficient Tiered Memory System for Deep Learning Training in {NPUs}", journal = j-TOS, volume = "21", number = "4", pages = "32:1--32:26", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3721484", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 18 09:33:56 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zou:2025:ARV, author = "Qiang Zou and Bo Mao and Suzhen Wu and Yujuan Tan and Donghong Qin", title = "Analyzing Request Volatility of {I/O} Temporal Behaviors in Mobile Storage Workloads", journal = j-TOS, volume = "21", number = "4", pages = "33:1--33:31", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3722117", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 18 09:33:56 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Shao:2025:TAJ, author = "Xinyang Shao and Yiduo Wang and Cheng Li and Hengyu Liang and Chenhan Wang and Feng Yan and Yinlong Xu", title = "Towards Agile and Judicious Metadata Load Balancing for {Ceph} File System via Matrix-based Modeling", journal = j-TOS, volume = "21", number = "4", pages = "34:1--34:30", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3721483", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 18 09:33:56 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Koo:2025:MIL, author = "Kyoungho Koo and Junhan Lee", title = "Maintaining Inter-Layer Equilibrium in Hierarchical-Storage-based {KV} Store", journal = j-TOS, volume = "21", number = "4", pages = "35:1--35:35", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3722224", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 18 09:33:56 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Wang:2025:SUP, author = "Keyu Wang and Huailiang Tan and Keqin Li", title = "Simplicity as the Ultimate Principle: The Art of Garbage Collection Management in {SSDs} Inspired by Natural Data Behavior", journal = j-TOS, volume = "21", number = "4", pages = "36:1--36:34", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3725219", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 18 09:33:56 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Su:2025:HHR, author = "Liangkuan Su and Mingwei Lin and Bo Mao and Jianpeng Zhang and Zeshui Xu", title = "{HaParallel}: Hit Ratio-Aware Parallel Aggressive Eviction Cache Management Algorithm for {SSDs}", journal = j-TOS, volume = "21", number = "4", pages = "37:1--37:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3728644", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 18 09:33:56 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Li:2025:BBT, author = "Tengfei Li and Minghao Yin and Juncheng Hu", title = "{BLA}: {Byzantine}-Tolerant Lazy Auditing Framework for Decentralized Storage Data Integrity", journal = j-TOS, volume = "21", number = "4", pages = "38:1--38:22", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3731542", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 18 09:33:56 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Levi:2025:PVL, author = "Asaf Levi and Philip Shilane and Sarai Sheinvald and Gala Yadgar", title = "Physical vs. Logical Indexing with {IDEA}: Inverted Deduplication-Aware Index", journal = j-TOS, volume = "21", number = "4", pages = "39:1--39:28", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3729426", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 18 09:33:56 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Sun:2025:MTB, author = "Hui Sun and Yinhui Chen and Yonwei Yu and Yajie Deng and Yinliang Yue and Song Jiang and Xiao Qin", title = "{MTree}: a Tiering-based Key--Value Store Powered by High-performance Hierarchical Data Management", journal = j-TOS, volume = "21", number = "4", pages = "40:1--40:40", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3736587", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 18 09:33:56 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Han:2025:DMF, author = "Daegyu Han and Jaeyoon Nam and Hokeun Cha and Changdae Kim and Kwangwon Koh and Taehoon Kim and Sang-Hoon Kim and Beomseok Nam", title = "Disaggregated Memory for File-backed Pages", journal = j-TOS, volume = "21", number = "4", pages = "41:1--41:27", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3736585", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Thu Dec 18 09:33:56 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Sun:2026:JJO, author = "Hui Sun and Jiaming Huang and Bo Chen and Yinliang Yue and Xiao Qin", title = "{JMStore}: Joint Optimization of Computation and Storage Balancing in Multi-{NDP} Key--Value Stores with Hash-Based Data Distribution", journal = j-TOS, volume = "22", number = "1", pages = "1:1--1:49", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3744568", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Feb 2 08:42:48 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "It is challenging to store and process massive unstructured data for key-value storage systems that require high concurrency, high performance, and low latency. Log-Structured Merge (LSM) trees-based Key-value stores or KV stores are widely adopted for \ldots{}", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Lee:2026:PED, author = "Dusol Lee and Inhyuk Choi and Chanyoung Lee and Hyungsoo Jung and Jihong Kim", title = "{P2Cache}: Enhancing Data-Centric Applications via Application-Guided Management of {OS} Page Caches", journal = j-TOS, volume = "22", number = "1", pages = "2:1--2:33", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3736586", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Feb 2 08:42:48 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Data-centric applications perform tasks that require intensive data processing and ample memory resources. These tasks have varying I/O access patterns, significantly impacted by the OS cache. Therefore, it is desirable to enable application-specific \ldots{}", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Xu:2026:APE, author = "Han Xu and Xiangyu Zou and Yunsheng Dong and Philip Shilane and Yanqi Pan and Cai Deng and Wen Xia", title = "{Argus}: a Precise and Efficient Resemblance Detection for Post-Deduplication Delta Compression", journal = j-TOS, volume = "22", number = "1", pages = "3:1--3:29", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3747839", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Feb 2 08:42:48 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "For data reduction techniques used in storage systems, delta compression is often implemented after deduplication, having been shown to achieve a much higher compression ratio by efficiently detecting and compressing similar data chunks. Unfortunately, \ldots{}", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Chen:2026:GCM, author = "Hung-Yi Chen and Jin-Wei Chang and Hong-Ruei Lin and Li-Pin Chang", title = "Graceful {CNN} Model Degradation in Uncorrected Flash Storage for Embedded Edge Devices", journal = j-TOS, volume = "22", number = "1", pages = "4:1--4:25", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3747298", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Feb 2 08:42:48 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Computing near the source of data has been proven effective in terms of energy conservation, latency improvement, and privacy preservation. With this, edge intelligence refers to local CNN inference in embedded edge devices. Because edge devices are \ldots{}", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Cai:2026:ABP, author = "Miao Cai and Junru Shen and Baoliu Ye", title = "Achieving Both Performance and Reliability in An Asymmetric File System on Disaggregated Persistent Memory", journal = j-TOS, volume = "22", number = "1", pages = "5:1--5:39", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3760403", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Feb 2 08:42:48 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "The ultra-fast persistent memories (PMs) promise a practical solution toward high-performance distributed file systems. This article examines and reveals a cascade of performance and reliability issues in the current PM provision scheme, which not only \ldots{}", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Wang:2026:TTP, author = "Li Wang and Shi Qiu and Jianqin Yan and Zhirong Shen and Qingbo Wu and Xin Yao and Meiling Wang and Renhai Chen and Yiming Zhang", title = "A Tale of Two Paths: Optimizing Paravirtualized Storage {I/O} with {eBPF}", journal = j-TOS, volume = "22", number = "1", pages = "6:1--6:24", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3760404", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Feb 2 08:42:48 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "KVM is the dominant VM hypervisor on Linux, and relies on QEMU to realize the backends of the virtio family of devices such as virtio-blk. However, KVM/QEMU-based paravirtualization prolongs the guest I/O path with multiple context switches. As fast NVMe \ldots{}", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Dou:2026:IRD, author = "Xinglei Dou and Lei Liu and Limin Xiao", title = "Is Intelligence the Right Direction in New {OS} Scheduling for Multiple Resources in Cloud Environments?", journal = j-TOS, volume = "22", number = "1", pages = "7:1--7:29", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3736584", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Feb 2 08:42:48 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Making it intelligent is a promising way in System/OS design. This article proposes OSML+, a new ML-based resource scheduling mechanism for co-located cloud services. OSML+ intelligently schedules the cache and main memory bandwidth resources at the \ldots{}", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Zhou:2026:PUP, author = "Shuyue Zhou and Ronglong Wu and Hao Li and Zhenggang Lin and Chengshuo Zheng and Zhirong Shen and Yijie Zhong and Fulin Nan and Yiming Zhang and Jiwu Shu", title = "From In-Place Updates to Out-of-Place Selections: Reconsidering Write Disturbance in Non-Volatile Memory", journal = j-TOS, volume = "22", number = "1", pages = "8:1--8:34", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3767319", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Feb 2 08:42:48 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Non-volatile memory (NVM) opens up new opportunities to resolve scaling restrictions of main memory, yet it is still hindered by the write disturbance (WD) problem. The WD problem mistakenly transforms the values of NVM cells, hence seriously \ldots{}", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Pang:2026:SDG, author = "Lu Pang and Krishna Kant", title = "Synthetic Data Generation for Storage Trace Augmentation", journal = j-TOS, volume = "22", number = "1", pages = "9:1--9:29", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3767317", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Feb 2 08:42:48 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Due to the increasingly data-intensive nature of the applications, the storage system performance continues to increase in importance and is often substantially responsible for the overall processing rate of the application. Fortunately, the storage \ldots{}", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", } @Article{Li:2026:HPF, author = "Xiaolu Li and Han Yuan and Xuan Liu and Junlong Zhang and Patrick P. C. Lee and Yuchong Hu and Dan Feng", title = "Harnessing Parallelism for Fast Data Repair in {MSR}-Coded Storage", journal = j-TOS, volume = "22", number = "1", pages = "10:1--10:38", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3773990", ISSN = "1553-3077 (print), 1553-3093 (electronic)", ISSN-L = "1553-3077", bibdate = "Mon Feb 2 08:42:48 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib", abstract = "Minimum-storage regenerating (MSR) codes are provably optimal erasure codes that minimize the repair bandwidth (i.e., the amount of traffic being transferred during a repair operation), while minimizing storage redundancy, in distributed storage systems. \ldots{}", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Storage", journal-URL = "https://dl.acm.org/loi/tos", }