%%% -*-BibTeX-*- %%% ==================================================================== %%% BibTeX-file{ %%% author = "Nelson H. F. Beebe", %%% version = "1.85", %%% date = "05 March 2026", %%% time = "11:26:48 MDT", %%% filename = "tocs.bib", %%% address = "University of Utah %%% Department of Mathematics, 110 LCB %%% 155 S 1400 E RM 233 %%% Salt Lake City, UT 84112-0090 %%% USA", %%% telephone = "+1 801 581 5254", %%% URL = "https://www.math.utah.edu/~beebe", %%% checksum = "07454 23066 119273 1200473", %%% email = "beebe at math.utah.edu, beebe at acm.org, %%% beebe at computer.org (Internet)", %%% codetable = "ISO/ASCII", %%% keywords = "bibliography, BibTeX, ACM Transactions on %%% Computer Systems", %%% license = "public domain", %%% supported = "no", %%% docstring = "This is a COMPLETE BibTeX bibliography for %%% the journal ACM Transactions on Computer %%% Systems (CODEN ACSYEC, ISSN 0734-2071 %%% (print), 1557-7333 (electronic)), covering %%% all journal issues from 1983 -- date. %%% Publication began with volume 1, number 1, in %%% 1983. The journal appears quarterly, in %%% February, May, August, and November. %%% %%% The journal has a World-Wide Web site at: %%% %%% http://www.acm.org/pubs/tocs %%% %%% Tables-of-contents are available at: %%% %%% http://www.acm.org/pubs/contents/journals/tocs/ %%% http://portal.acm.org/browse_dl.cfm?idx=J774 %%% %%% There is currently coverage of all volumes, %%% except 1 and 2 (1983--1984), at that site. %%% %%% Qualified subscribers can retrieve the full %%% text of recent articles in PDF form. %%% %%% At version 1.85, the COMPLETE journal %%% coverage looked like this: %%% %%% 1983 ( 24) 1998 ( 11) 2013 ( 12) %%% 1984 ( 22) 1999 ( 9) 2014 ( 9) %%% 1985 ( 15) 2000 ( 12) 2015 ( 13) %%% 1986 ( 15) 2001 ( 13) 2016 ( 12) %%% 1987 ( 18) 2002 ( 12) 2017 ( 13) %%% 1988 ( 18) 2003 ( 13) 2018 ( 3) %%% 1989 ( 13) 2004 ( 10) 2019 ( 9) %%% 1990 ( 14) 2005 ( 13) 2020 ( 6) %%% 1991 ( 15) 2006 ( 12) 2021 ( 19) %%% 1992 ( 13) 2007 ( 11) 2022 ( 2) %%% 1993 ( 13) 2008 ( 10) 2023 ( 5) %%% 1994 ( 11) 2009 ( 8) 2024 ( 9) %%% 1995 ( 12) 2010 ( 9) 2025 ( 15) %%% 1996 ( 14) 2011 ( 12) 2026 ( 7) %%% 1997 ( 16) 2012 ( 15) %%% %%% Article: 536 %%% Proceedings: 1 %%% %%% Total entries: 537 %%% %%% The initial draft was extracted from the %%% ACM Web site, with manual corrections and %%% additions from bibliographies in the TeX %%% User Group collection, the author's %%% personal bibliography files, the Compendex %%% database, and a very large computer science %%% bibliography collection on ftp.ira.uka.de %%% in /pub/bibliography to which many people %%% of have contributed. Where multiple %%% sources of a particular entry existed, %%% field values have been manually merged to %%% preserve maximal information. Missing %%% entries were identified by software %%% developed for the TeX User Group and BibNet %%% bibliography archive projects, and were %%% then supplied from the original journal %%% issues. Questions arising from conflicting %%% data were resolved by consulting the %%% original journal issues. %%% %%% ACM copyrights explicitly permit abstracting %%% with credit, so article abstracts, keywords, %%% and subject classifications have been %%% included in this bibliography wherever %%% available. Article reviews have been %%% omitted, until their copyright status has %%% been clarified. %%% %%% The bibsource keys in the bibliography %%% entries below indicate the data sources, %%% usually the Karlsruhe computer science %%% bibliography archive for the first two %%% volumes, or the journal Web site or the %%% Compendex database, both of which lack %%% coverage of this journal before 1985. %%% %%% URL keys in the bibliography point to %%% World Wide Web locations of additional %%% information about the entry. %%% %%% Spelling has been verified with the UNIX %%% spell and GNU ispell programs using the %%% exception dictionary stored in the %%% companion file with extension .sok. %%% %%% BibTeX citation tags are uniformly chosen %%% as name:year:abbrev, where name is the %%% family name of the first author or editor, %%% year is a 4-digit number, and abbrev is a %%% 3-letter condensation of important title %%% words. Citation tags were automatically %%% generated by software developed for the %%% BibNet Project. %%% %%% In this bibliography, entries are sorted in %%% publication order, using ``bibsort -byvolume.'' %%% %%% The checksum field above contains a CRC-16 %%% checksum as the first value, followed by the %%% equivalent of the standard UNIX wc (word %%% count) utility output of lines, words, and %%% characters. This is produced by Robert %%% Solovay's checksum utility.", %%% } %%% ==================================================================== @Preamble{ "\input bibnames.sty" # "\ifx \undefined \circled \def \circled #1{(#1)}\fi" # "\ifx \undefined \reg \def \reg {\circled{R}}\fi" # "\ifx \undefined \TM \def \TM {${}^{\sc TM}$} \fi" } %%% ==================================================================== %%% Acknowledgement abbreviations: @String{ack-nhfb = "Nelson H. F. Beebe, University of Utah, Department of Mathematics, 110 LCB, 155 S 1400 E RM 233, Salt Lake City, UT 84112-0090, USA, Tel: +1 801 581 5254, e-mail: \path|beebe@math.utah.edu|, \path|beebe@acm.org|, \path|beebe@computer.org| (Internet), URL: \path|https://www.math.utah.edu/~beebe/|"} %%% ==================================================================== %%% Journal abbreviations: @String{j-TOCS = "ACM Transactions on Computer Systems"} %%% ==================================================================== %%% Publisher abbreviations: @String{pub-ACM = "ACM Press"} @String{pub-ACM:adr = "New York, NY 10036, USA"} %%% ==================================================================== %%% Bibliography entries: @Article{Jones:1983:EI, author = "Anita K. Jones", title = "{Editor}'s Introduction", journal = j-TOCS, volume = "1", number = "1", pages = "1--2", month = feb, year = "1983", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "TOCS", } @Article{Reed:1983:IAA, author = "David P. Reed", title = "Implementing Atomic Actions on Decentralized Data", journal = j-TOCS, volume = "1", number = "1", pages = "3--23", month = feb, year = "1983", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "TOCS", } @Article{Clark:1983:CPV, author = "Douglas W. Clark", key = "Clark", title = "Cache Performance in the {VAX-11\slash 780}", journal = j-TOCS, volume = "1", number = "1", pages = "24--37", month = feb, year = "1983", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Oct 12 13:58:27 1984", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib; ftp://ftp.ira.uka.de/pub/bibliography/Math/sparse.linear.systems.bib; ftp://ftp.ira.uka.de/pub/bibliography/Os/storage.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "The performance of memory caches is usually studied through trace-driven simulation. This approach has several drawbacks. Notably, it excludes realistic multiprogramming, operating system, and I/O activity. In this paper, cache performance is studied by direct measurement of the hardware. A hardware monitor was attached to a VAX-11/780 computer, whose cache was then measured during normal use. A producible synthetic timesharing workload was also run. This paper reports measurements including the hit ratios of data and instruction references, the rate of cache invalidations by I/O, and the amount of waiting time due to cache misses. Additional measurements were made with half the cache disabled, and with the entire cache disabled.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "TOCS", } @Article{Shamir:1983:GCS, author = "Adi Shamir", title = "On the Generation of Cryptographically Strong Pseudorandom Sequences", journal = j-TOCS, volume = "1", number = "1", pages = "38--44", month = feb, year = "1983", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "TOCS", } @Article{Cox:1983:ICP, author = "George W. Cox and William M. Corwin and Konrad K. Lai and Fred J. Pollack", title = "Interprocess Communication, and Processor Dispatching on the {Intel 432}", journal = j-TOCS, volume = "1", number = "1", pages = "45--66", month = feb, year = "1983", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Sep 9 09:46:02 1986", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib; ftp://ftp.ira.uka.de/pub/bibliography/Os/os.bib; ftp://ftp.ira.uka.de/pub/bibliography/Parallel/Multi.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "A unified facility for interprocess communication and processor dispatching on the Intel 432 is described. The facility is based on a queuing and binding mechanism called a port. The goals and motivations for ports, both abstract and implementation views of them, and their absolute and comparative performance are described.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "TOCS", owner = "seufert", } @Article{Sauer:1983:CAS, author = "Charles H. Sauer", title = "Computational Algorithms for State-Dependent Queueing Networks", journal = j-TOCS, volume = "1", number = "1", pages = "67--92", month = feb, year = "1983", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib; ftp://ftp.ira.uka.de/pub/bibliography/Distributed/QLD/1983.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", note = "See corrigendum \cite{Sauer:1983:CCA}.", acknowledgement = ack-nhfb, annote = "\ldots{} in this paper the author limits the material reviewed to three forms of state dependency in queueing networks which have the product form. The major part of the paper address state-dependent routing, in which the probability of entering a queue of a subnetwork depend upon the quotient of a linear function of the number of customers in that queue and another linear function of the total number of customers in the subnetwork \ldots{}", country = "USA", date = "28/09/84", descriptors = "Queueing network; method; state dependent queueing; MVA; CONVOLUTION ALGORITHM", enum = "2690", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "TOCS", language = "English", location = "RWTH-AC-DFV: TELL", references = "26", revision = "21/04/91", } @Article{Anonymous:1983:IA, author = "Anonymous", title = "Information for Authors", journal = j-TOCS, volume = "1", number = "1", pages = "93--95", month = feb, year = "1983", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:18:40 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Schwetman:1983:PSI, author = "Herbert D. Schwetman", title = "Preface to the Special Issue", journal = j-TOCS, volume = "1", number = "2", pages = "97--98", month = may, year = "1983", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:18:40 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Eager:1983:PBH, author = "Derek L. Eager and Kenneth C. Sevcik", title = "Performance Bound Hierarchies for Queueing Networks", journal = j-TOCS, volume = "1", number = "2", pages = "99--115", month = may, year = "1983", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:18:40 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Agrawal:1983:ASM, author = "Subhash C. Agrawal and Jeffrey P. Buzen", title = "The Aggregate Server Method for Analyzing Serialization Delays in Computer Systems", journal = j-TOCS, volume = "1", number = "2", pages = "116--143", month = may, year = "1983", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Distributed/QLD/1982.bib; ftp://ftp.ira.uka.de/pub/bibliography/Os/IMMD_IV.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, annote = "An approximate, iterative method is presented to estimate the delays caused by programs waiting to enter critical sections and other software control structures in which mutual exclusion is enforced (i.e., one-at-a-time or serialized processing). Some common shares of such serialization delays include routines that perform resource allocation, modify internal data structures, or update external files and databases \ldots{}", country = "USA", date = "02/12/83", descriptors = "Queueing approximation; process management; aggregate server method; serialization; resource allocation", enum = "38", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", language = "English", location = "RWTH-AC-DFV: Bibl.", references = "0", revision = "19/03/92", } @Article{Chandy:1983:DDD, author = "K. Mani Chandy and Laura M. Haas and Jayadev Misra", title = "Distributed Deadlock Detection", journal = j-TOCS, volume = "1", number = "2", pages = "144--156", month = may, year = "1983", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib; ftp://ftp.ira.uka.de/pub/bibliography/Distributed/QLD/1983.bib; ftp://ftp.ira.uka.de/pub/bibliography/Misc/Discrete.event.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, annote = "Distributed deadlock models are presented for resource and communication deadlocks. Simple distributed algorithms for detection of these deadlocks are given. We show that all true deadlocks are detected and that no false deadlocks are reported. In our algorithms, no process maintains global information; all messages have an identical short length. The algorithms can be applied in distributed database and other message communication systems.", country = "USA", date = "00/00/00", descriptors = "DISTRIBUTED SIMULATION; COMPUTER NETWORK; DEADLOCK", enum = "8087", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "TOCS", language = "English", location = "UniS-IND-DS_C", references = "15", revision = "19/10/93", xxnote = "Check author order??", } @Article{Cappello:1983:VLP, author = "Peter R. Cappello and Kenneth Steiglitz", title = "A {VLSI} Layout for a Pipelined {Dadda} Multiplier", journal = j-TOCS, volume = "1", number = "2", pages = "157--174", month = May, year = "1983", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Math/computer.arithmetic.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", note = "Reprinted in E. E. Swartzlander, {\em Computer Arithmetic}, Vol. 2, IEEE Computer Society Press Tutorial, Los Alamitos, CA, 1990.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "multiplication", } @Article{Blum:1983:HES, author = "Manuel Blum", title = "How to Exchange (Secret) Keys", journal = j-TOCS, volume = "1", number = "2", pages = "175--193", month = may, year = "1983", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Theory/crypto.security.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", note = "Previously published in ACM STOC '83 proceedings, pages 440--447.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Hoshino:1983:PPM, author = "Tsutomu Hoshino and Toshio Kawai and Tomonori Shirakawa and Junichi Higashino and Akira Yamaoka and Hachidai Ito and Takashi Sato and Kazuo Sawada", title = "{PACS}: a Parallel Microprocessor Array for Scientific Calculations", journal = j-TOCS, volume = "1", number = "3", pages = "195--221", month = aug, year = "1983", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Parallel/ovr.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Schlichting:1983:FSP, author = "Richard D. Schlichting and Fred B. Schneider", title = "Fail-Stop Processors: An Approach to Designing Fault-Tolerant Computing Systems", journal = j-TOCS, volume = "1", number = "3", pages = "222--238", month = aug, year = "1983", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib; ftp://ftp.ira.uka.de/pub/bibliography/Distributed/distfs.bib; ftp://ftp.ira.uka.de/pub/bibliography/SE/dependability.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "TOCS", } @Article{Akl:1983:CSP, author = "Selim G. Akl and Peter D. Taylor", title = "Cryptographic Solution to a Problem of Access Control in a Hierarchy", journal = j-TOCS, volume = "1", number = "3", pages = "239--248", month = aug, year = "1983", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:18:40 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Bauer:1983:KDP, author = "R. K. Bauer and T. A. Berson and R. J. Feiertag", title = "A Key Distribution Protocol Using Event Markers", journal = j-TOCS, volume = "1", number = "3", pages = "249--255", month = aug, year = "1983", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Misc/misc.1.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Kemmerer:1983:SRM, author = "Richard A. Kemmerer", title = "Shared Resource Matrix Methodology: An Approach to Identifying Storage and Timing Channels", journal = j-TOCS, volume = "1", number = "3", pages = "256--277", month = aug, year = "1983", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:18:40 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Jones:1983:PSI, author = "Anita K. Jones", title = "Preface to Special Issue", journal = j-TOCS, volume = "1", number = "4", pages = "279--280", month = nov, year = "1983", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:18:40 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Strecker:1983:TBC, author = "William D. Strecker", title = "Transient Behavior of Cache Memories", journal = j-TOCS, volume = "1", number = "4", pages = "281--293", month = nov, year = "1983", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:18:40 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Kobayashi:1983:ORC, author = "Hiroshi Kobayashi and Mario Gerla", title = "Optimal Routing in Closed Queueing Networks", journal = j-TOCS, volume = "1", number = "4", pages = "294--310", month = nov, year = "1983", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Distributed/QLD/1983.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, annote = "\ldots{} This paper addresses the problem of obtaining the set of routing probabilities that will minimize response time, or alternatively maximize the throughput. An algorithm, called the flow deviation (FD) algorithm, is already known for obtaining the optimal routing probabilities for open queueing network models \ldots{}", country = "USA", date = "28/11/84", descriptors = "Closed queueing network; routing algorithm", enum = "1726", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", language = "English", location = "RWTH-AC-DFV: Bibl.", references = "0", revision = "21/04/91", } @Article{Sloan:1983:MEB, author = "Lansing Sloan", title = "Mechanisms that Enforce Bounds on Packet Lifetimes", journal = j-TOCS, volume = "1", number = "4", pages = "311--330", month = nov, year = "1983", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:18:40 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Shankar:1983:HPS, author = "A. Udaya Shankar and Simon S. Lam", title = "An {HDLC} Protocol Specification and Its Verifications Using Image Protocols", journal = j-TOCS, volume = "1", number = "4", pages = "331--368", month = nov, year = "1983", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:18:40 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Sauer:1983:CCA, author = "Charles H. Sauer", title = "Corrigendum: Computational Algorithms for State-Dependent Queuing Networks", journal = j-TOCS, volume = "1", number = "4", pages = "369--369", month = nov, year = "1983", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Distributed/QLD/1983.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", note = "See \cite{Sauer:1983:CAS}.", acknowledgement = ack-nhfb, country = "USA", date = "13/05/93", descriptors = "Queueing network; product form; analysis", enum = "7840", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", language = "English", location = "SEL: Wi", references = "0", revision = "16/01/94", } @Article{Anonymous:1984:I, author = "Anonymous", title = "Index", journal = j-TOCS, volume = "1", number = "4", pages = "370--371", month = nov, year = "1984", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:18:40 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Jones:1984:PSI, author = "Anita K. Jones", title = "Preface to Special Issue", journal = j-TOCS, volume = "2", number = "1", pages = "1--1", month = nov, year = "1984", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:18:40 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Schroeder:1984:EGG, author = "Michael D. Schroeder and Andrew D. Birrell and Roger M. Needham", title = "Experience with {Grapevine}: The Growth of a Distributed System", journal = j-TOCS, volume = "2", number = "1", pages = "3--23", month = feb, year = "1984", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 15 14:49:51 1987", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib; ftp://ftp.ira.uka.de/pub/bibliography/Database/Wiederhold/1984.bib; ftp://ftp.ira.uka.de/pub/bibliography/Distributed/Danzig.bib; ftp://ftp.ira.uka.de/pub/bibliography/Os/os.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Grapevine is a distributed, replicated system that provides message delivery, naming, authentication, resource location, and access control services in an internet of computers. The system, described in a previous paper [1], was designed and implemented several years ago. We now have had operational experience with the system under substantial load. In this paper we report on what we have learned from using Grapevine.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "Design; experimentation; Grapevine; reliability; TOCS operating systems distributed systems database systems", owner = "manning", } @Article{Lindsay:1984:CCR, author = "Bruce G. Lindsay and Laura M. Haas and C. Mohan and Paul F. Wilms and Robert A. Yost", title = "Computation and Communication in {R}: a Distributed Database Manager", journal = j-TOCS, volume = "2", number = "1", pages = "24--38", month = feb, year = "1984", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib; ftp://ftp.ira.uka.de/pub/bibliography/Database/Wiederhold/1984.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", note = "Also published in/as: SOSP 9, Bretton Woods, Oct. 1983.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "Rstar; TOCS", } @Article{Birrell:1984:IRP, author = "Andrew D. Birrell and Bruce Jay Nelson", key = "Birrell \& Nelson", title = "Implementing Remote Procedure Calls", journal = j-TOCS, volume = "2", number = "1", pages = "39--59", month = feb, year = "1984", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 15 14:59:58 1987", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Compiler/bcp.bib; ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib; ftp://ftp.ira.uka.de/pub/bibliography/Distributed/networks.bib; ftp://ftp.ira.uka.de/pub/bibliography/Misc/misc.1.bib; ftp://ftp.ira.uka.de/pub/bibliography/Os/IMMD_IV.bib; ftp://ftp.ira.uka.de/pub/bibliography/Os/os.bib; ftp://ftp.ira.uka.de/pub/bibliography/SE/dependability.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Remote procedure calls (RPC) appear to be a useful paradigm for providing communication across a network between programs written in a high-level language. This paper describes a package providing a remote procedure call facility, the options that face the designer of such a package, and the decisions we made. We describe the overall structure of our RPC mechanism, our facilities for binding RPC clients, the transport level, communication protocol, and some performance measurements. We include descriptions of some optimizations used to achieve high performance and to minimize the load on server machines that have many clients.", acknowledgement = ack-nhfb, checked = yes, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "Design; distributed naming and binding; experimentation; inter-process communication; performance; performance of communication protocols; remote procedure calls; RPC, Cedar; RPC, transport layer protocol; security; TOCS; transport layer protocols", memos = "The idea of RPC was first suggested by J. E. White in a paper entitled ``A high-level framework for network-based resource sharing'' in the Proceedings of the National Computer Conference in June 1976. The implementation of RPC described in the paper is the one from the {\em Cedar\/} project at Xerox.", owner = "manning", } @Article{Berkovich:1984:CCT, author = "Simon Y. Berkovich and Colleen Roe Wilson", title = "A Computer Communication Technique Using Content-Induced Transaction Overlap", journal = j-TOCS, volume = "2", number = "1", pages = "60--77", month = feb, year = "1984", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "TOCS", } @Article{Kameda:1984:OCP, author = "Hisao Kameda", title = "Optimality of a Central Processor Scheduling Policy for Processing a Job Stream", journal = j-TOCS, volume = "2", number = "1", pages = "78--90", month = feb, year = "1984", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib; ftp://ftp.ira.uka.de/pub/bibliography/Os/IMMD_IV.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "TOCS", } @Article{Smith:1984:PSI, author = "Alan Jay Smith", title = "Preface to Special Issue", journal = j-TOCS, volume = "2", number = "2", pages = "91--92", month = may, year = "1984", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:18:40 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Marsan:1984:CGS, author = "Marco Ajmone Marsan and Gianni Conte and Gianfranco Balbo", title = "A Class of Generalized Stochastic {Petri} Nets for the Performance Evaluation of Multiprocessor Systems", journal = j-TOCS, volume = "2", number = "2", pages = "93--122", month = may, year = "1984", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Misc/Discrete.event.bib; ftp://ftp.ira.uka.de/pub/bibliography/Os/IMMD_IV.bib; ftp://ftp.ira.uka.de/pub/bibliography/SE/uni-do.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "GSPN", } @Article{Tantawi:1984:PAC, author = "Asser N. Tantawi and Manfred Ruschitzka", title = "Performance Analysis of Checkpointing Strategies", journal = j-TOCS, volume = "2", number = "2", pages = "123--144", month = may, year = "1984", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:18:40 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Schneider:1984:BGA, author = "Fred B. Schneider", title = "{Byzantine} Generals in Action: Implementing Fail-Stop Processors", journal = j-TOCS, volume = "2", number = "2", pages = "145--154", month = may, year = "1984", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib; ftp://ftp.ira.uka.de/pub/bibliography/Distributed/distfs.bib; ftp://ftp.ira.uka.de/pub/bibliography/Os/IMMD_IV.bib; ftp://ftp.ira.uka.de/pub/bibliography/SE/dependability.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "TOCS", } @Article{Stamos:1984:SGS, author = "James W. Stamos", title = "Static Grouping of Small Objects to Enhance Performance of a Paged Virtual Memory", journal = j-TOCS, volume = "2", number = "2", pages = "155--180", month = may, year = "1984", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Compiler/gc.bib; ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib; ftp://ftp.ira.uka.de/pub/bibliography/Parallel/distmem.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Smalltalk is an object-oriented, interactive programming environment that maintains sate between user sessions. Because of the persistence of objects, it is possible to use program restructuring techniques to statically relocate objects in virtual memory. Grouping related objects on the same disk page increases locality of reference, reduces the number of page faults, and improves performance. Five types of static grouping algorithms along with the static analysis performed on their outputs, and empirical evidence of their performance are presented.", acknowledgement = ack-nhfb, comment = "Using the garbage collector to improve performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "clustering TOCS", } @Article{McKusick:1984:FFS, author = "Marshall K. McKusick and William N. Joy and Sam J. Leffler and Robert S. Fabry", key = "McKusick et al.", title = "A Fast File System for {UNIX}", journal = j-TOCS, volume = "2", number = "3", pages = "181--197", month = aug, year = "1984", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Feb 7 10:11:41 1985", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib; ftp://ftp.ira.uka.de/pub/bibliography/Os/unix.1.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "A reimplementation of the UNIX file system is described. The reimplementation provides substantially higher throughput rates by using more flexible allocation policies that allow better locality of reference and can be adapted to a wide range of peripheral and processor characteristics. The new file system clusters data that is sequentially accessed and provides two block sizes to allow fast access to large files while not wasting large amounts of space for small files. File access rates of up to ten times faster than the traditional UNIX file system are experienced. Long-needed enhancements to the programmers' interface are discussed. These include a mechanism to place advisory locks on files, extensions of the name space across file systems, the ability to use long file names, and provisions for administrative control of resource usage.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "TOCS; UNIX, file system organization, file system performance, file system design, application program interface", } @Article{Landwehr:1984:SMM, author = "Carl E. Landwehr and Constance L. Heitmeyer and John McLean", title = "A Security Model for Military Message System", journal = j-TOCS, volume = "2", number = "3", pages = "198--222", month = aug, year = "1984", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Wiederhold/1984.bib; ftp://ftp.ira.uka.de/pub/bibliography/Misc/bibsec.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Schwarz:1984:SSA, author = "Peter M. Schwarz and Alfred Z. Spector", title = "Synchronizing Shared Abstract Types", journal = j-TOCS, volume = "2", number = "3", pages = "223--250", month = aug, year = "1984", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:18:40 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Chang:1984:RBP, author = "Jo-Mei Chang and N. F. Maxemchuk", title = "Reliable Broadcast Protocols", journal = j-TOCS, volume = "2", number = "3", pages = "251--273", month = aug, year = "1984", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Compiler/gc.bib; ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib; ftp://ftp.ira.uka.de/pub/bibliography/Misc/misc.1.bib; ftp://ftp.ira.uka.de/pub/bibliography/SE/dependability.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, comment = "Atomic multicast protocol.", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "Consensus / Broadcast; Multicast; TOCS operating distributed systems reliability networks communication", } @Article{Anonymous:1984:IA, author = "Anonymous", title = "Information for Authors", journal = j-TOCS, volume = "2", number = "3", pages = "274--276", month = aug, year = "1984", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:18:40 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Saltzer:1984:EEA, author = "J. H. Saltzer and D. P. Reed and D. D. Clark", key = "Saltzer et al.", title = "End-to-End Arguments in System Design", journal = j-TOCS, volume = "2", number = "4", pages = "277--288", month = nov, year = "1984", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Mar 6 11:12:06 1985", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib; ftp://ftp.ira.uka.de/pub/bibliography/Misc/digital.library.bib; ftp://ftp.ira.uka.de/pub/bibliography/Os/os.bib; ftp://ftp.ira.uka.de/pub/bibliography/SE/dependability.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "This paper presents a design principle that helps guide placement of functions among the modules of a distributed computer system. The principle, called the end-to-end argument, suggests that functions placed at low levels of a system may be redundant or of little value when compared with the cost of providing them at that low level. Examples discussed in the paper include bit-error recovery, security using encryption, duplicate message suppression, recovery from system crashes, and delivery acknowledgement. Low-level mechanisms to support these functions are justified only as performed enhancements.", acknowledgement = ack-nhfb, comments = "Argues that you should put functionality at the higher app layers, rather than at low layers. Includes a security example", entered-by = "Andreas Paepcke", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "data communication; design; design principles; protocol design; TOCS", } @Article{Smith:1984:DAE, author = "James E. Smith", title = "Decoupled Access\slash Execute Computer Architectures", journal = j-TOCS, volume = "2", number = "4", pages = "289--308", month = nov, year = "1984", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib; ftp://ftp.ira.uka.de/pub/bibliography/Math/sparse.linear.systems.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "TOCS", } @Article{Tichy:1984:SSC, author = "Walter F. Tichy", title = "The String-to-String Correction Problem with Block Moves", journal = j-TOCS, volume = "2", number = "4", pages = "309--321", month = nov, year = "1984", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib; ftp://ftp.ira.uka.de/pub/bibliography/Misc/allison.bib; ftp://ftp.ira.uka.de/pub/bibliography/Misc/protein.pattern.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Uses block moves as the edit operation, seeks min' number, gets a $O(n)$ linear algorithm if P. Weiner's data-structure used. $S$ source string, $T$ target string. Algorithm: find longest prefix of $T$ that is a substring of $S$, this gives the first block move; repeat until done. Proof: by induction on number of block moves. One block move - $T$ must obviously be a substring of $S$, alg' finds this. Suppose optimal is $i$ block moves and alg' fails, i.e., finds $j > i$ moves. T:----Opt1----$|$--Opt2--$|$---Opt3---$|$.....$|$--Algi-- T:-----Alg1-----$|$---Alg2---$|$.....$|$-----Algj------ NB. $|$Alg1$|$ $>$= $|$Opt1$|$ Delete the substring Alg1. By induction the alg' would find the opt' explanation of the rest of $T$ - but it doesn't :- contradiction.", acknowledgement = ack-nhfb, comment = "``An algorithm that produces the shortest edit sequence transforming one string into another is presented. The algorithm is optimal in the sense that it generated a minimal covering set of common substrings of one string with respect to another. Two improvements of the basic algorithm are developed. The first improvement performs well on strings with few replicated symbols. The second improvement runs in time and space linear to the size of the input. Efficient algorithms for regenerating a string from an edit sequence are also presented.'' longest common sequence", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "longest common subsequence, LCS, LCSS, edit distance, block, move, TOCS, string to strings, sequence, alignment, linear, algorithm; TOCS", } @Article{Rom:1984:OSC, author = "Raphael Rom", title = "Ordering Subscribers on Cable Networks", journal = j-TOCS, volume = "2", number = "4", pages = "322--334", month = nov, year = "1984", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "TOCS", } @Article{Bryant:1984:MPA, author = "Raymond M. Bryant and Anthony E. Krzesinski and M. Seetha Lakshmi and K. Mani Chandy", title = "The {MVA} Priority Approximation", journal = j-TOCS, volume = "2", number = "4", pages = "335--359", month = nov, year = "1984", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 11:57:59 1999", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib; ftp://ftp.ira.uka.de/pub/bibliography/Os/IMMD_IV.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "TOCS", } @Article{Birrell:1985:SCU, author = "Andrew D. Birrell", title = "Secure Communication Using Remote Procedure Calls", journal = j-TOCS, volume = "3", number = "1", pages = "1--14", month = feb, year = "1985", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-1/p1-birrell/", abstract = "Research on encryption-based secure communication protocols has reached a stage where it is feasible to construct end-to-end secure protocols. The design of such a protocol, built as part of a remote procedure call package, is described. The security abstraction presented to users of the package, the authentication mechanisms, and the protocol for encrypting and verifying remote calls are also described.", acknowledgement = ack-nhfb, affiliationaddress = "Xerox Corp", classification = "723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "computer networks; cryptography; design; experimentation; Protocols; remote procedure calls; secure communication; security", subject = "{\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection, Cryptographic controls. {\bf C.2.0} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, General, Security and protection (e.g., firewalls). {\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols, Protocol architecture.", } @Article{Skeen:1985:DLP, author = "Dale Skeen", title = "Determining the Last Process to Fail", journal = j-TOCS, volume = "3", number = "1", pages = "15--30", month = feb, year = "1985", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-1/p15-skeen/", abstract = "A total failure occurs whenever all processes cooperatively executing a distributed task fail before the task completes. A frequent prerequisite for recovery from a total failure is identification of the last set (LAST) of processes to fail. Necessary and sufficient conditions are derived here for computing LAST from the local failure data of recovered processes. These conditions are then translated into procedures for deciding LAST membership, using either complete or incomplete failure data. The choice of failure data is itself dictated by two requirements: (1) it can be cheaply maintained, and (2) it must afford maximum fault-tolerance in the sense that the expected number of recoveries required for identifying LAST is minimized.", acknowledgement = ack-nhfb, affiliationaddress = "Cornell Univ, Ithaca, NY, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "algorithms; computer systems, digital; cooperative processes; database systems --- Distributed; Distributed; event ordering; reliability; total failure", subject = "{\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Reliability, availability, and serviceability. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Checkpoint/restart. {\bf H.2.2} Information Systems, DATABASE MANAGEMENT, Physical Design, Recovery and restart.", } @Article{Clark:1985:PVT, author = "Douglas W. Clark and Joel S. Emer", title = "Performance of the {VAX-11\slash 780} Translation Buffer: Simulation and Measurement", journal = j-TOCS, volume = "3", number = "1", pages = "31--62", month = feb, year = "1985", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-1/p31-clark/", abstract = "A virtual-address translation buffer (TB) is a hardware cache of recently used virtual-to-physical address mappings. The authors present the results of a set of measurements and simulations of translation buffer performance in the VAX-11\slash 780. Two different hardware monitors were attached to VAX-11\slash 780 computers, and translation buffer behavior was measured. Measurements were made under normal time-sharing use and while running reproducible synthetic time-sharing work loads. Reported measurements include the miss ratios of data and instruction references, the rate of TB invalidations due to context switches, and the amount of time taken to service TB misses. Additional hardware measurements were made with half the TB disabled. Trace-driven simulations of several programs were also run; the traces captured system activity as well as user-mode execution. Several variants of the 11\slash 780 TB structure were simulated.", acknowledgement = ack-nhfb, affiliationaddress = "Digital Equipment Corp, Littleton, MA, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "cache memories; computer simulation; computers, digital --- Performance; data storage units; design; experimentation; hardware monitor; measurement; performance; trace-driven simulation; translation buffer", subject = "{\bf C.1.1} Computer Systems Organization, PROCESSOR ARCHITECTURES, Single Data Stream Architectures, VAX. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Associative memories. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Cache memories. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Virtual memory. {\bf B.3.3} Hardware, MEMORY STRUCTURES, Performance Analysis and Design Aids**, Simulation**.", } @Article{Chandy:1985:DSD, author = "K. Mani Chandy and Leslie Lamport", title = "Distributed Snapshots: Determining Global States of Distributed Systems", journal = j-TOCS, volume = "3", number = "1", pages = "63--75", month = feb, year = "1985", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-1/p63-chandy/", abstract = "This paper presents an algorithm by which a process in a distributed system determines a global state of the system during a computation. Many problems in distributed systems can be cast in terms of the problem of detecting global states. For instance, the global state detection algorithm helps to solve an important class of problems: stable property detection. A stable property is one that persists: once a stable property becomes true it remains true thereafter. Examples of stable properties are `computation has terminated', `the system is deadlocked' and `all tokens in a token ring have disappeared. ' The stable property detection problem is that of devising algorithms to detect a given stable property. Global state detection can also be used for checkpointing.", acknowledgement = ack-nhfb, affiliationaddress = "Univ of Texas at Austin, Austin, TX, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "algorithms; computer programming --- Algorithms; computer systems, digital; Distributed; distributed deadlock detection; distributed snapshots; global states", subject = "{\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Concurrency. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Deadlocks. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Multiprocessing/multiprogramming/multitasking. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Mutual exclusion. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Scheduling. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Synchronization. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Backup procedures. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Checkpoint/restart. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Verification.", } @Article{Cheriton:1985:DPG, author = "David R. Cheriton and Willy Zwaenepoel", title = "Distributed Process Groups in the {V} Kernel", journal = j-TOCS, volume = "3", number = "2", pages = "77--107", month = may, year = "1985", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-2/p77-cheriton/", abstract = "The V kernel supports an abstraction of processes, with operations for interprocess communication, process management, and memory management. This abstraction is used as a software base for constructing distributed systems. As a distributed kernel, the V kernel makes intermachine boundaries largely transparent. In this environment of many cooperating processes on different machines, there are many logical groups of processes. In this paper we describe the extension of the V kernel to support process groups. Operations on groups include group interprocess communication. Aspects of the implementation and performance, and initial experience with applications are discussed.", acknowledgement = ack-nhfb, affiliationaddress = "Stanford Univ, Stanford, CA, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "algorithms; computer operating systems; computer systems, digital --- Distributed; design; distributed process groups; measurement; performance; V kernel", subject = "{\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design.", } @Article{Even:1985:PCC, author = "S. Even and O. Goldreich", title = "On the Power of Cascade Ciphers", journal = j-TOCS, volume = "3", number = "2", pages = "108--116", month = may, year = "1985", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-2/p108-even/", abstract = "The unicity distance of a cascade of random ciphers, with respect to known plaintext attack, is shown to be the sum of the key lengths. At time-space trade-off for the exhaustive cracking of a cascade of ciphers is shown. The structure of the set of permutations realized by a cascade is studied; it is shown that only l. 2**k exhaustive experiments are necessary to determine the behavior of a cascade of l stages, each having k key bits. It is concluded that the cascade of random ciphers is not a random cipher. Yet, it is shown that, with probability, the number of permutations realizable by a cascade of l random ciphers, each having k key bits, is 2**l**k. Next, it is shown that two stages are not worse than one, by a simple reduction of the cracking problem of any of the stages to the cracking problem of the cascade. Finally, it is shown that proving a nonpolynomial lower bound on the cracking problem of long cascades is a hard task, since such a bound implies that P does not equal NP.", acknowledgement = ack-nhfb, affiliationaddress = "Technion-Israel Inst of Technology, Haifa, Isr", classification = "723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "algorithms; cascade ciphers; cryptography; data encryption; data processing --- Security of Data; random ciphers; security; theory; unicity distance", subject = "{\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection. {\bf E.3} Data, DATA ENCRYPTION.", } @Article{Padmanabhan:1985:PAR, author = "Krishnan Padmanabhan and Duncan H. Lawrie", title = "Performance Analysis of Redundant-Path Networks for Multiprocessor Systems", journal = j-TOCS, volume = "3", number = "2", pages = "117--144", month = may, year = "1985", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-2/p117-padmanabhan/", abstract = "Performance of a class of multistage interconnection networks employing redundant paths is investigated. Redundant path networks provide significant tolerance to faults at minimal costs; in this paper improvements in performance and very graceful degradation are also shown to result from the availability of redundant paths. A Markov model is introduced for the operation of these networks in the circuit-switched mode and is solved numerically to obtain the performance measures of interest. The structure of the networks that provide maximal performance is also characterized.", acknowledgement = ack-nhfb, affiliationaddress = "Univ of Illinois at Urbana-Champaign, Urbana, IL, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "computer systems, digital; design; Multiprocessing; multistage interconnection networks; performance; performance analysis; redundant-path networks", subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Performance attributes. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Interconnection architectures. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Multiple-instruction-stream, multiple-data-stream processors (MIMD). {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Parallel processors**. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Design studies. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Modeling techniques.", } @Article{Maekawa:1985:AME, author = "Mamoru Maekawa", title = "A {$\sqrt{N}$} Algorithm for Mutual Exclusion in Decentralized Systems", journal = j-TOCS, volume = "3", number = "2", pages = "145--159", month = may, year = "1985", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-2/p145-maekawa/", abstract = "An algorithm is presented that uses only c ROOT N messages to create mutual exclusion in a computer network, where N is the number of nodes and c a constant between 3 and 5. The algorithm is symmetric and allows fully parallel operation.", acknowledgement = ack-nhfb, affiliationaddress = "Univ of Tokyo, Dep of Information Science, Tokyo, Jpn", classification = "723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "algorithms; computer networks; computer programming --- Algorithms; decentralized systems; design; mutual exclusion; performance", subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Mutual exclusion. {\bf C.2.1} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design, Network communications. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Network operating systems.", } @Article{Smith:1985:DCM, author = "Alan Jay Smith", title = "Disk Cache --- Miss Ratio Analysis and Design Considerations", journal = j-TOCS, volume = "3", number = "3", pages = "161--203", month = aug, year = "1985", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-3/p161-smith/", abstract = "The current trend of computer system technology is toward CPUs with rapidly increasing processing power and toward disk drives of rapidly increasing density, but with disk performance increasing very slowly if at all. The implication of these trends is that at some point the processing power of computer systems will be limited by the throughput of the input\slash output (I/O) system. A solution to this problem, which is described and evaluated in this paper, is disk cache. The idea is to buffer recently used portions of the disk address space in electronic storage. Experimental results are based on extensive trace-driven simulations using traces taken from three large IBM or IBM-compatible mainframe data processing installations. We find that disk cache is a powerful means of extending the performance limits of high-end computer systems.", acknowledgement = ack-nhfb, affiliation = "Univ of California, Dep of Electrical Engineering \& Computer Sciences, Berkeley, CA, USA", affiliationaddress = "Univ of California, Dep of Electrical Engineering \& Computer Sciences, Berkeley, CA, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "cache controller; computer systems, digital; data storage units; design; disk cache; experimentation; I/O buffer; measurement; performance", subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Cache memories. {\bf B.4.2} Hardware, INPUT/OUTPUT AND DATA COMMUNICATIONS, Input/Output Devices, Channels and controllers. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Mass storage. {\bf B.3.3} Hardware, MEMORY STRUCTURES, Performance Analysis and Design Aids**. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management.", } @Article{Strom:1985:ORD, author = "Robert E. Strom and Shaula Yemini", title = "Optimistic Recovery in Distributed Systems", journal = j-TOCS, volume = "3", number = "3", pages = "204--226", month = aug, year = "1985", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-3/p204-strom/", abstract = "Optimistic Recovery is a new technique supporting application-independent transparent recovery from processor failures in distributed systems. In optimistic recovery communication, computation and checkpointing proceed asynchronously. Synchronization is replaced by causal dependency tracking, which enables a posteriori reconstruction of a consistent distributed system state following a failure using process rollback and message replay. Because there is no synchronization among computation, communication, and checkpointing, optimistic recovery can tolerate the failure of an arbitrary number of processors and yields better throughput and response time than other general recovery techniques whenever failures are infrequent.", acknowledgement = ack-nhfb, affiliationaddress = "IBM, Thomas J. Watson Research Cent, Yorktown Heights, NY, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "algorithms; computer operating systems; computer programming --- Algorithms; computer systems, digital; Distributed; optimistic algorithms; optimistic recovery; reliability; verification", subject = "{\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Network operating systems. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Distributed systems. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors). {\bf D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent Programming.", } @Article{Tay:1985:EBP, author = "Y. C. Tay and Rajan Suri", title = "Error Bounds for Performance Prediction in Queuing Networks", journal = j-TOCS, volume = "3", number = "3", pages = "227--254", month = aug, year = "1985", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-3/p227-tay/", abstract = "Analytic models based on closed queuing networks (CQNS) are widely used for performance prediction in practical systems. In using such models, there is always a prediction error, that is, a difference between the predicted performance and actual outcome. This prediction error is due both to modeling errors and estimation errors, the latter being the difference between the estimated values of the CQN parameters and the actual outcomes. This paper considers the second class of errors; in particular, it studies the effect of small estimation errors and provides bounds on prediction errors based on bounds on estimation errors. Estimation errors may be divided into two types: (1) the difference between the estimated value and the average value of the outcome, and (2) the deviation of the actual value from its average. The analysis first studies the sum of both types of errors, then the second type alone. The results are illustrated with three examples.", acknowledgement = ack-nhfb, affiliationaddress = "Natl Univ of Singapore, Dep of Mathematics, Kent Ridge, Singapore", classification = "722; 723; 922", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "closed queuing networks; computer systems, digital; error bounds; measurement; performance; Performance; probability --- Queueing Theory; product form networks; queuing networks; verification", subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Modeling techniques. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Stochastic analysis. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Queueing theory. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Modeling and prediction.", } @Article{Brown:1985:AFS, author = "Mark R. Brown and Karen N. Kolling and Edward A. Taft", title = "The {Alpine} File System", journal = j-TOCS, volume = "3", number = "4", pages = "261--293", month = nov, year = "1985", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-4/p261-brown/", abstract = "Alpine is a file system that supports atomic transactions and is designed to operate as a service on a computer network. Alpine's primary purpose is to store files that represent databases. An important secondary goal is to store ordinary files representing documents, program modules, and the like. Unlike other file servers described in the literature, Alpine uses a log-based technique to implement atomic file update. Another unusual aspect of Alpine is that it performs all communication via a general-purpose remote procedure call facility. Both of these decisions have worked out well. This paper describes Alpine's design and implementation, and evaluates the system in light of our experience to date. The Cedar language and programming environment is used to develop Alpine.", acknowledgement = ack-nhfb, affiliationaddress = "Xerox Corp, USA", classification = "723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "alpine; cedar; computer networks; computer programming languages; database systems; design; experimentation; file servers; file system; reliability", subject = "{\bf D.4.0} Software, OPERATING SYSTEMS, General, Alpine. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Checkpoint/restart. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management. {\bf D.3.2} Software, PROGRAMMING LANGUAGES, Language Classifications, Cedar. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Backup procedures. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Distributed systems. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Distributed databases.", } @Article{DeMori:1985:RAB, author = "Renato {De Mori} and R{\'e}gis Cardin", title = "A Recursive Algorithm for Binary Multiplication and its Implementation", journal = j-TOCS, volume = "3", number = "4", pages = "294--314", month = nov, year = "1985", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-4/p294-de_mori/", abstract = "A new recursive algorithm for deriving the layout of parallel multipliers is presented. Based on this algorithm, a network for performing multiplications of two's complement numbers is proposed. The network can be implemented in a synchronous or an asynchronous way. If the factors to be multiplied have N bits, the area complexity of the network is O(N**2) for practical values of N as in the case of cellular multipliers. Due to the design approach based on a recursive algorithm, a time complexity O(log N) is achieved. It is shown how the structure can be pipelined with period complexity O(1) and used for single and double precision multiplication.", acknowledgement = ack-nhfb, affiliationaddress = "Concordia Univ, Dep of Computer Science, Montreal, Que, Can", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "algorithms; Algorithms; binary multiplication; complexity; computer programming; computer systems, digital --- Parallel Processing; design; performance; recursive algorithm", subject = "{\bf B.2.1} Hardware, ARITHMETIC AND LOGIC STRUCTURES, Design Styles, Parallel. {\bf B.2.1} Hardware, ARITHMETIC AND LOGIC STRUCTURES, Design Styles, Pipeline. {\bf C.5.4} Computer Systems Organization, COMPUTER SYSTEM IMPLEMENTATION, VLSI Systems.", } @Article{Chow:1985:DCM, author = "Ching-Hua Chow and Mohamed G. Gouda and Simon S. Lam", title = "A Discipline for Constructing Multiphase Communication Protocols", journal = j-TOCS, volume = "3", number = "4", pages = "315--343", month = nov, year = "1985", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-4/p315-chow/", abstract = "Many communication protocols can be observed to go through different phases performing a distinct function in each phase. A multiphase model for such protocols is presented. A phase is formally defined to be a network of communicating finite-state machines with certain desirable correctness properties; these include proper termination and freedom from deadlocks and unspecified receptions. A multifunction protocol is constructed by first constructing separate phases to perform its different functions. It is shown how to connect these phases together to realize the multifunction protocol so that the resulting network of communicating finite state machines is also a phase (i. e., it possesses the desirable properties defined for phases). The modularity inherent in multiphase protocols facilitates not only their construction but also their understanding and modification. An abundance of protocols have been found in the literature that can be constructed as multiphase protocols. Three examples are presented here: two versions of IBM's BSC protocol for data link control and a token ring network protocol.", acknowledgement = ack-nhfb, affiliationaddress = "Univ of Texas at Austin, Dep of Computer Sciences, Austin, TX, USA", classification = "723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "algorithms; BSC protocols; computer networks; design; multiphase communication protocols; Protocols; theory; token ring network protocol; verification", subject = "{\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols, Protocol architecture. {\bf B.4.4} Hardware, INPUT/OUTPUT AND DATA COMMUNICATIONS, Performance Analysis and Design Aids**, Formal models**. {\bf B.4.4} Hardware, INPUT/OUTPUT AND DATA COMMUNICATIONS, Performance Analysis and Design Aids**, Verification**. {\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols, Protocol verification. {\bf D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent Programming. {\bf D.2.2} Software, SOFTWARE ENGINEERING, Design Tools and Techniques, Modules and interfaces. {\bf D.2.2} Software, SOFTWARE ENGINEERING, Design Tools and Techniques, Structured programming**. {\bf D.2.4} Software, SOFTWARE ENGINEERING, Software/Program Verification, Correctness proofs. {\bf D.2.4} Software, SOFTWARE ENGINEERING, Software/Program Verification, Validation.", } @Article{Suzuki:1985:DME, author = "Ichiro Suzuki and Tadao Kasami", title = "A Distributed Mutual Exclusion Algorithm", journal = j-TOCS, volume = "3", number = "4", pages = "344--349", month = nov, year = "1985", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-4/p344-suzuki/", abstract = "A distributed algorithm is presented that realizes mutual exclusion among N nodes in a computer network. The algorithm requires at most N message exchanges for one mutual exclusion invocation. Accordingly, the delay to invoke mutual exclusion is smaller than in an algorithm of Ricart and Agrawala, which requires 2*(N-1) message exchanges per invocation. A drawback of the algorithm is that the sequence numbers contained in the messages are unbounded. It is shown that this problem can be overcome by slightly increasing the number of message exchanges.", acknowledgement = ack-nhfb, affiliationaddress = "Osaka Univ, Toyonaka, Jpn", classification = "723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "Algorithms; algorithms; computer networks; computer programming; distributed mutual exclusion algorithm; message exchange; process management", subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Mutual exclusion. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems.", } @Article{Smith:1986:IGP, author = "Connie U. Smith", title = "Independent General Principles for Constructing Responsive Software Systems", journal = j-TOCS, volume = "4", number = "1", pages = "1--31", month = feb, year = "1986", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-1/p1-smith/", abstract = "Three general principles are presented that can be applied in early software life cycle stages for the definition of software requirements and designs with acceptable performance. They are genuine high-level considerations for meeting responsiveness goals without sacrificing understandability and maintainability, and without increasing development time and cost. The principles are derived from the interrelationships of two performance models: a queueing network based on computer system model and an execution graph software model. The performance effect of each of the principles is quantified using the models. Examples are given that illustrate how they can be applied to software systems.", acknowledgement = ack-nhfb, affiliationaddress = "Duke Univ, Dep of Computer Science, Durham, NC, USA", classification = "722; 723; 921", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "computer software; computer systems, digital --- Performance; design; mathematical models; Performance; performance", subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Design studies. {\bf D.2.10} Software, SOFTWARE ENGINEERING, Design**. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Performance attributes. {\bf D.0} Software, GENERAL. {\bf D.2.9} Software, SOFTWARE ENGINEERING, Management, Life cycle. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Modeling and prediction. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Operational analysis.", } @Article{Herlihy:1986:QCR, author = "Maurice Herlihy", title = "A Quorum-Consensus Replication Method for Abstract Data Types", journal = j-TOCS, volume = "4", number = "1", pages = "32--53", month = feb, year = "1986", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-1/p32-herlihy/", abstract = "Replication can enhance the availability of data in distributed systems. This paper introduces a new method for managing replicated data. Unlike many methods that support replication only for uninterpreted files, this method systematically exploits type-specific properties of objects such as sets, queues, or directories to provide more effective replication. Each operation requires the cooperation of a certain number of sites for its successful completion. A quorum for an operation is any such set of sites. Necessary and sufficient constraints on quorum intersections are derived from an analysis of the data type's algebraic structure. A reconfiguration method is proposed that permits quorums to be changed dynamically. By taking advantage of type-specific properties in a general and systematic way, this method can realize a wider range of availability properties and more flexible reconfiguration than comparable replication methods.", acknowledgement = ack-nhfb, affiliationaddress = "Carnegie-Mellon Univ, Computer Science Dep, Pittsburgh, PA, USA", classification = "723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "abstract data types; algorithms; computer operating systems; computer programming languages; database systems; reliability; replication method; verification", subject = "{\bf D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent Programming. {\bf D.3.3} Software, PROGRAMMING LANGUAGES, Language Constructs and Features, Abstract data types. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Distributed file systems. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Distributed databases. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing.", } @Article{Joseph:1986:LCM, author = "Thomas A. Joseph and Kenneth P. Birman", title = "Low Cost Management of Replicated Data in Fault-Tolerant Distributed Systems", journal = j-TOCS, volume = "4", number = "1", pages = "54--70", month = feb, year = "1986", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-1/p54-joseph/", abstract = "Many distributed systems replicate data for fault tolerance or availability. In such systems, a logical update on a data item results in a physical update on a number of copies. The synchronization and communication required to keep the copies of replicated data consistent introduce a delay when operations are performed. In this paper, we describe a technique that relaxes the usual degree of synchronization, permitting replicated data items to be updated concurrently with other operations, while at the same time ensuring that correctness is not violated. The additional concurrency thus obtained results in better response time when performing operations on replicated data. We also discuss how this technique performs in conjunction with a roll-back and a roll-forward failure recovery mechanism.", acknowledgement = ack-nhfb, affiliationaddress = "Cornell Univ, Dep of Computer Science, Ithaca, NY, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "algorithms; computer systems, digital --- Distributed; database systems; fault-tolerant distributed systems; reliability; replicated data; roll-forward recovery; update", subject = "{\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Concurrency. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Synchronization. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Checkpoint/restart. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf H.2.2} Information Systems, DATABASE MANAGEMENT, Physical Design, Recovery and restart. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing.", } @Article{Kameda:1986:EJL, author = "Hisao Kameda", title = "Effects of Job Loading Policies for Multiprogramming Systems in Processing a Job Stream", journal = j-TOCS, volume = "4", number = "1", pages = "71--106", month = feb, year = "1986", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-1/p71-kameda/", abstract = "The scheduling of jobs for multiprogramming systems includes the selection of jobs to be loaded into memory (job loading policy or memory schedule) and the scheduling for CPU processing (CPU schedule). There has been a successful empirical claim for the optimal CPU schedule; its optimality has been proved in a Markovian model of job-stream processing that uses the first-come-first-loaded (FCFL) job loading policy. We extend this model to gain insight into the effects of job loading policies. Our investigation, supported by numerical calculations, suggests that much more care may be needed in implementing the job loading policy that aims at the optimal processing capacity than in implementing the optimal CPU schedule. This agrees with what has been conjectured on the basis of empirical studies.", acknowledgement = ack-nhfb, affiliationaddress = "Univ of Electro-Communications, Dep of Computer Science, Chofu, Jpn", classification = "723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "computer operating systems; computer systems programming; finite memory size model; job loading policies; multiple-resource system; performance; theory; throughput", subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Modeling techniques. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Scheduling. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Allocation/deallocation strategies. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Modeling and prediction. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Queueing theory. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Stochastic analysis. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Multiprocessing/multiprogramming/multitasking.", } @Article{Carriero:1986:NLK, author = "Nicholas Carriero and David Gelernter", title = "The {S/Net}'s {Linda} kernel", journal = j-TOCS, volume = "4", number = "2", pages = "110--129", month = may, year = "1986", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-2/p110-carriero/", abstract = "Linda is a parallel programming language that differs from other parallel languages in its simplicity and in its support for distributed data structures. The S/Net is a multicomputer, designed and built at AT\&T Bell Laboratories, that is based on a fast, word-parallel bus interconnect. We describe the Linda-supporting communication kernel we have implemented on the S/Net. The implementation suggests that Linda's unusual share-memory-like communication primitives can be made to run well in the absence of physically shared memory; the simplicity of the language and of our implementation's logical structure suggest that similar Linda implementations might readily be constructed on related architectures. We outline the language, and programming methodologies based on distributed data structures; we then describe the implementation, and the performance both of the Linda primitives themselves and of a simple S/Net-Linda matrix-multiplication program designed to exercise them.", acknowledgement = ack-nhfb, affiliationaddress = "Yale Univ, New Haven, CT, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "communication kernel; computer programming languages; computer systems, digital --- Parallel Processing; data processing --- Data Structures; design; languages; Linda parallel programming languages; S/Net", subject = "{\bf D.3.3} Software, PROGRAMMING LANGUAGES, Language Constructs and Features, Concurrent programming structures. {\bf C.2.1} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design, Network communications. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Network operating systems. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Message sending.", } @Article{Kronenberg:1986:VCC, author = "Nancy P. Kronenberg and Henry M. Levy and William D. Strecker", title = "{VAXclusters}: a Closely-Coupled Distributed System", journal = j-TOCS, volume = "4", number = "2", pages = "130--146", month = may, year = "1986", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-2/p130-kronenberg/", abstract = "A VAXcluster is a highly available and extensible configuration of VAX computers that operate as a single system. To achieve performance in a multicomputer environment, a new communications architecture, communications hardware, and distributed software were jointly designed. The software is a distributed version of the VAX\slash VMS operating system that uses a distributed lock manager to synchronize access to shared resources. The communications hardware includes a 70 megabit per second message-oriented interconnect and an interconnect port that performs communications tasks traditionally handled by software. Performance measurements show this structure to be highly efficient, for example, capable of sending and receiving 3000 messages per second on a VAX-11\slash 780.", acknowledgement = ack-nhfb, affiliationaddress = "Digital Equipment Corp, Littleton, MA, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "computer software; computer systems, digital; design; Distributed; intersystem communication protocols; network protocols; performance; reliability; VAXclusters", subject = "{\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management. {\bf C.2.5} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Local and Wide-Area Networks, Buses. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Distributed file systems. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance.", } @Article{Fitzgerald:1986:IVM, author = "Robert Fitzgerald and Richard F. Rashid", title = "The Integration of Virtual Memory Management and Interprocess Communication in {Accent}", journal = j-TOCS, volume = "4", number = "2", pages = "147--177", month = may, year = "1986", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-2/p147-fitzgerald/", abstract = "The integration of virtual memory management and interprocess communication in the Accent network operating system kernel is examined. The design and implementation of the Accent memory management system is discussed and its performance, both on a series of message-oriented bench-marks and in normal operation, is analyzed in detail.", acknowledgement = ack-nhfb, affiliationaddress = "Carnegie-Mellon Univ, Pittsburgh, PA, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "accent kernel; algorithms; computer operating systems; data transmission; design; interprocess communication; measurement; performance; Storage Allocation; virtual memory management", subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Virtual memory. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Message sending. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Operational analysis. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Distributed systems. {\bf B.1.5} Hardware, CONTROL STRUCTURES AND MICROPROGRAMMING, Microcode Applications, Firmware support of operating systems/instruction sets**.", } @Article{Hoyme:1986:TSM, author = "K. P. Hoyme and S. C. Bruell and P. V. Afshari and R. Y. Kain", title = "A Tree-Structured Mean Value Analysis Algorithm", journal = j-TOCS, volume = "4", number = "2", pages = "178--185", month = may, year = "1986", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-2/p178-hoyme/", abstract = "In a recent paper, S. S. Lam and Y. L Lien described an algorithm called tree-convolution that can reduce the space and computation time required for evaluating sparse multiclass, product-form queueing networks. In this paper, we develop an exact algorithm based on mean value analysis (MVA) that is the counterpart of the tree-convolution algorithm. The order of reduction in storage and computation achieved by our new Tree-MVA algorithm compared to the standard MVA algorithm is the same order of reduction obtained by three-convolution algorithm over that of the standard convolution algorithm. Our Three-MVA algorithm preserves the inherent simplicity of MVA based algorithms.", acknowledgement = ack-nhfb, affiliationaddress = "Honeywell Systems \& Research Cent, Minneapolis, MN, USA", classification = "723; 921", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "Algorithms; algorithms; computer programming; design; mathematical techniques --- Trees; mean value analysis algorithm; performance; tree-structured algorithm", subject = "{\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Operational analysis. {\bf C.2.1} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Modeling techniques. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Modeling and prediction. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Stochastic analysis.", } @Article{Barbara:1986:VVA, author = "Daniel Barbara and H{\'e}ctor Garc{\'\i}a-Molina", title = "The Vulnerability of Vote Assignments", journal = j-TOCS, volume = "4", number = "3", pages = "187--213", month = aug, year = "1986", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-3/p187-barbara/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; reliability", subject = "{\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Network operating systems. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed applications. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Reliability, availability, and serviceability. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf B.1.3} Hardware, CONTROL STRUCTURES AND MICROPROGRAMMING, Control Structure Reliability, Testing, and Fault-Tolerance**, Error-checking**.", } @Article{Iyer:1986:MMC, author = "R. K. Iyer and D. J. Rossetti and M. C. Hsueh", title = "Measurement and Modeling of Computer Reliability as Affected by System Activity", journal = j-TOCS, volume = "4", number = "3", pages = "214--237", month = aug, year = "1986", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-3/p214-iyer/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "measurement; performance; reliability", subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Measurement techniques. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Reliability, availability, and serviceability.", } @Article{Lazowska:1986:FAP, author = "Edward D. Lazowska and John Zahorjan and David R. Cheriton and Willy Zwaenepoel", title = "File Access Performance of Diskless Workstations", journal = j-TOCS, volume = "4", number = "3", pages = "238--268", month = aug, year = "1986", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-3/p238-lazowska/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; measurement; performance", subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Design studies. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Distributed systems. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Modeling and prediction. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Network operating systems.", } @Article{Archibald:1986:CCP, author = "James Archibald and Jean-Loup Baer", title = "Cache Coherence Protocols: Evaluation Using a Multiprocessor Simulation Model", journal = j-TOCS, volume = "4", number = "4", pages = "273--298", month = nov, year = "1986", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-4/p273-archibald/", abstract = "Using simulation, we examine the efficiency of several distributed, hardware-based solutions to the cache coherence problem in shared-bus multiprocessors. For each of the approaches, the associated protocol is outlined. The simulation model is described, and results from that model are presented. The magnitude of the potential performance difference between the various approaches indicates that the choice of coherence solution is very important in the design of an efficient shared-bus multiprocessor, since it may limit the number of processors in the system.", acknowledgement = ack-nhfb, affiliationaddress = "Univ of Washington, Seattle, WA, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "cache coherence protocols; computer simulation; computer systems, digital; design; measurement; Multiprocessing; performance; shared-bus multiprocessor", subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Cache memories. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Multiple-instruction-stream, multiple-data-stream processors (MIMD). {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Measurement techniques. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Modeling techniques. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Distributed memories.", } @Article{Comer:1986:CBM, author = "Douglas E. Comer and Larry L. Peterson", title = "Conversation-Based Mail", journal = j-TOCS, volume = "4", number = "4", pages = "299--319", month = nov, year = "1986", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-4/p299-comer/", abstract = "A new message communication paradigm based on conversations that provides an alternative to memo-and conference-based mail is described. A conversation-based message system groups messages into conversations, and orders messages within a conversation according to the context in which they were written. The message context relation leads to an efficient implementation of conversations in a distributed environment and supports a natural ordering of messages when viewed by the user. Experience with a prototype demonstrates the workability of conversation-based mail and suggests that conversations provide a powerful tool for message communication.", acknowledgement = ack-nhfb, affiliationaddress = "Purdue Univ, West Lafayette, IN, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "computer systems, digital; conversation-based mail; design; Distributed; electronic mail; human factors; management; message systems", subject = "{\bf H.4.3} Information Systems, INFORMATION SYSTEMS APPLICATIONS, Communications Applications, Electronic mail. {\bf H.4.3} Information Systems, INFORMATION SYSTEMS APPLICATIONS, Communications Applications, Computer conferencing, teleconferencing, and videoconferencing.", } @Article{Badal:1986:DDD, author = "D. Z. Badal", title = "The Distributed Deadlock Detection Algorithm", journal = j-TOCS, volume = "4", number = "4", pages = "320--337", month = nov, year = "1986", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-4/p320-badal/", abstract = "We propose a distributed deadlock detection algorithm for distributed computer systems. We consider two types of resources, depending on whether the remote resource lock granularity and mode can or cannot be determined without access to the remote resource site. We present the algorithm, its performance analysis, and an informal argument about its correctness. The proposed algorithm has a hierarchical design intended to detect the most frequent deadlocks with maximum efficiency.", acknowledgement = ack-nhfb, affiliationaddress = "Hewlett--Packard Lab, Palo Alto, CA, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "algorithms; computer programming --- Algorithms; computer systems, digital; deadlock detection; design; Distributed; distributed algorithms; message communication systems; performance", subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Deadlocks. {\bf H.2.2} Information Systems, DATABASE MANAGEMENT, Physical Design, Deadlock avoidance. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Network operating systems.", } @Article{Carey:1986:PMC, author = "Michael J. Carey and Waleed A. Muhanna", title = "The Performance of Multiversion Concurrency Control Algorithms", journal = j-TOCS, volume = "4", number = "4", pages = "338--378", month = nov, year = "1986", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-4/p338-carey/", abstract = "This paper describes a simulation study of the performance of several multiversion concurrency control algorithms, investigating the extent to which they provide increases in the level of concurrency and also the CPU, I/O, and storage costs resulting from the use of multiple versions. The algorithms are compared with regard to performance with their single-version counterparts and with each other. It is shown that each algorithm offers significant performance improvements despite the additional disk accesses involved in accessing old versions of data; the nature of the improvement depends on the algorithm in question. It is also shown that the storage overhead for maintaining old versions that may be required by ongoing transactions is not all that large under most circumstances.", acknowledgement = ack-nhfb, affiliationaddress = "Univ of Wisconsin, Madison, WI, USA", classification = "723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "algorithms; computer programming --- Algorithms; database systems; deadlock avoidance; design; Distributed; experimentation; multiversion concurrency control algorithms; performance; transaction processing", subject = "{\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Simulation. {\bf H.2.2} Information Systems, DATABASE MANAGEMENT, Physical Design, Deadlock avoidance. {\bf H.2.2} Information Systems, DATABASE MANAGEMENT, Physical Design, Recovery and restart. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing.", } @Article{Lamport:1987:FME, author = "Leslie Lamport", title = "A Fast Mutual Exclusion Algorithm", journal = j-TOCS, volume = "5", number = "1", pages = "1--11", month = feb, year = "1987", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-1/p1-lamport/", abstract = "A new solution to the mutual exclusion problem is presented that, in the absence of contention, requires only seven memory accesses. It assumes atomic reads and atomic writes to shared registers.", acknowledgement = ack-nhfb, affiliationaddress = "Digital Equipment Corp, Palo Alto, CA, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "algorithms; computer programming --- Algorithms; computer systems, digital; memory accesses; Multiprocessing; mutual exclusion algorithm", subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Mutual exclusion. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Deadlocks. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Shared memory.", } @Article{Cheriton:1987:UUS, author = "David R. Cheriton", title = "{UIO}: a {Uniform I/O} System Interface for Distributed Systems", journal = j-TOCS, volume = "5", number = "1", pages = "12--46", month = feb, year = "1987", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-1/p12-cheriton/", abstract = "The UIO (uniform I/O) system interface that has been used for the past five years in the V distributed operating system is described, with the focus on the key design issues. This interface provides several extensions beyond the I/O interface of UNIX, including support for record I/O, locking, atomic transactions, and replication, as well as attributes that indicate whether optional semantics and operations are available. Experience in using and implementing this interface with a variety of different I/O services is described, along with the performance of both local and network I/O. It is concluded that the UIO interface provides a uniform I/O system interface with significant functionality, wide applicability, and no significant performance penalty.", acknowledgement = ack-nhfb, affiliationaddress = "Stanford Univ, Stanford, CA, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "computer interfaces; computer operating systems; computer systems, digital --- Distributed; design; experimentation; files input/output; interprocess communication; performance; remote procedure call; standardization; uniform I/O interface", subject = "{\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Input/output. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Network operating systems. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Distributed systems. {\bf C.2.0} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, General, Security and protection (e.g., firewalls).", } @Article{Birman:1987:RCP, author = "Kenneth P. Birman and Thomas A. Joseph", title = "Reliable Communication in the Presence of Failures", journal = j-TOCS, volume = "5", number = "1", pages = "47--76", month = feb, year = "1987", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-1/p47-birman/", abstract = "The design and correctness of a communication facility for a distributed computer system are reported on. The facility provides support for fault-tolerant process groups in the form of a family of reliable multicast protocols that can be used in both local-and wide-area networks. These protocols attain high levels of concurrency, while respecting application-specific delivery ordering constraints, and have varying cost and performance that depend on the degree of ordering desired. In particular, a protocol that enforces causal delivery orderings is introduced and shown to be a valuable alternative to conventional asynchronous communication protocols. The facility also ensures that the processes belonging to a fault-tolerant process group will observe consistent orderings of events affecting the group as a whole.", acknowledgement = ack-nhfb, affiliationaddress = "Cornell Univ, Ithaca, NY, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "computer networks --- Protocols; computer systems, digital; Distributed; fault tolerance; multicast protocols; performance; reliability", subject = "{\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed applications. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Concurrency. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Synchronization. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Distributed databases. {\bf H.2.2} Information Systems, DATABASE MANAGEMENT, Physical Design, Recovery and restart. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Network communication. {\bf C.2.1} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design, Network communications.", } @Article{Geist:1987:CDS, author = "Robert Geist and Stephen Daniel", title = "A Continuum of Disk Scheduling Algorithms", journal = j-TOCS, volume = "5", number = "1", pages = "77--92", month = feb, year = "1987", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-1/p77-geist/", abstract = "A continuum of disk scheduling algorithms, V(R), having endpoints V(0) equals SSTF and V(1) equals SCAN, is defined. V(R) maintains a current SCAN direction (in or out) and services next the request with the smallest effective distance. The effective distance of a request that lies in the current direction is its physical distance (in cylinders) from the read\slash write head. The effective distance of a request in the opposite direction is its physical distance plus R multiplied by (total number of cylinders on the disk). By use of simulation methods, it is shown that this definitional continuum also provides a continuum in performance, both with respect to the mean and with respect to the standard deviation of request waiting time. For objective functions that are linear combinations of the two measures, mu w plus k sigma //w, intermediate points of the continuum are seen to provide performance uniformly superior to both SSTF and SCAN. A method of implementing V(R) and the results of its experimental use in a real system are presented.", acknowledgement = ack-nhfb, affiliationaddress = "Clemson Univ, Clemson, SC, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "algorithms; computer operating systems; computer programming --- Algorithms; computer simulation; computer systems, digital; disk scheduling algorithms; measurement; moving-head disk; performance; Scheduling", subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Scheduling. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Input/output. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Modeling and prediction. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Simulation.", } @Article{Smith:1987:RDC, author = "Alan Jay Smith", title = "Remark on {``Disk Cache --- Miss Ratio Analysis and Design Consideration''}", journal = j-TOCS, volume = "5", number = "1", pages = "93--93", month = feb, year = "1987", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-1/p93-smith/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design", subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Secondary storage. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Cache memories.", } @Article{Watson:1987:GET, author = "Richard W. Watson and Sandy A. Mamrak", title = "Gaining Efficiency in Transport Services by Appropriate Design and Implementation Choices", journal = j-TOCS, volume = "5", number = "2", pages = "97--120", month = may, year = "1987", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-2/p97-watson/", abstract = "This paper examines transport protocol mechanisms and implementation issues and argues that general-purpose transport protocols can be effective in a wide range of distributed applications because (1) many of the mechanisms used in the special-purpose protocols can also be used in general-purpose protocol designs and implementations, (2) special-purpose designs have hidden costs, and (3) very special operating system environments, overall system loads, application response times, and interaction patterns are required before general-purpose protocols are the main system performance bottlenecks.", acknowledgement = ack-nhfb, affiliationaddress = "Lawrence Livermore Natl Lab, Livermore, CA, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "computer networks --- Protocols; computer systems, digital; design; Distributed; economics; interprocess communication; performance; standardization; transport layer protocols; transport services", subject = "{\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols, Protocol architecture.", } @Article{Joyce:1987:MDS, author = "Jeffrey Joyce and Greg Lomow and Konrad Slind and Brian Unger", title = "Monitoring Distributed Systems", journal = j-TOCS, volume = "5", number = "2", pages = "121--150", month = may, year = "1987", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-2/p121-joyce/", abstract = "The monitoring of distributed systems involves the collection, interpretation, and display of information concerning the interactions among concurrently executing processes. This information and its display can support the debugging, testing, performance evaluation, and dynamic documentation of distributed systems. General problems associated with monitoring are outlined in this paper, and the architecture of a general purpose, extensible, distributed monitoring system is presented. Three approaches to the display of process interactions are described: textual traces, animated graphical traces, and a combination of aspects of the textual and graphical approaches. The roles that each of these approaches fulfills in monitoring and debugging distributed systems are identified and compared. Monitoring tools for collecting communication statistics, detecting deadlock, controlling the nondeterministic execution of distributed systems, and for using protocol specifications in monitoring are also described.", acknowledgement = ack-nhfb, affiliationaddress = "Univ of Calgary, Calgary, Alberta, Can", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "computer software --- Monitoring; computer systems, digital; concurrent monitoring; deadlock; design; Distributed; distributed monitoring; dynamic documentation; human factors; measurement; protocol specifications", subject = "{\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Monitors. {\bf D.2.2} Software, SOFTWARE ENGINEERING, Design Tools and Techniques, User interfaces. {\bf D.2.5} Software, SOFTWARE ENGINEERING, Testing and Debugging. {\bf D.2.4} Software, SOFTWARE ENGINEERING, Software/Program Verification, Assertion checkers. {\bf D.2.7} Software, SOFTWARE ENGINEERING, Distribution, Maintenance, and Enhancement, Documentation. {\bf D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent Programming. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed applications. {\bf D.2.6} Software, SOFTWARE ENGINEERING, Programming Environments.", } @Article{Glasgow:1987:DPF, author = "Janice I. Glasgow and Glenn H. MacEwen", title = "The Development and Proof of a Formal Specification for a Multilevel Secure System", journal = j-TOCS, volume = "5", number = "2", pages = "151--184", month = may, year = "1987", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-2/p151-glasgow/", abstract = "This paper describes current work on the design and specification of a multilevel secure distributed system called SNet. It discusses security models in general, the various problems of information flows in SNet, and the abstract and concrete security model components for SNet. It also introduces Lucid as a language for specifying distributed systems. The model components are expressed in Lucid; these Lucid partial specifications are shown to be correct with respect to the formal model, and the two model components are shown to be consistent.", acknowledgement = ack-nhfb, affiliationaddress = "Queen's Univ, Kingston, Ont, Can", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "computer operating systems; computer programming languages; computer systems, digital; data processing --- Security of Data; Distributed; formal specification; lucid; multilevel secure system; security; SNet; verification", subject = "{\bf C.2.0} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, General, Security and protection (e.g., firewalls). {\bf D.3.2} Software, PROGRAMMING LANGUAGES, Language Classifications, LUCID. {\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection, Information flow controls. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, SNet.", } @Article{Schwan:1987:HPO, author = "Karsten Schwan and Tom Bihari and Bruce W. Weide and Gregor Taulbee", title = "High-Performance Operating System Primitives for Robotics and Real-Time Control Systems", journal = j-TOCS, volume = "5", number = "3", pages = "189--231", month = aug, year = "1987", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-3/p189-schwan/", abstract = "The Generalized Executive for real-time Multiprocessor applications (GEM) is an operating system that addresses several requirements of operating software. First, when using GEM, programmers can select one of two different types of tasks differing in size, called processes and microprocesses. Second, the scheduling calls offered by GEM permit the implementation of several models of task interaction. Third, GEM supports multiple models of communication with a parameterized communication mechanism. Fourth, GEM is closely coupled to prototype real-time programming environments that provide programming support for the models of computation offered by the operating system. GEM is being used on a multiprocessor with robotics application software of substantial size and complexity.", acknowledgement = ack-nhfb, affiliationaddress = "Ohio State Univ, Columbus, OH, USA", classification = "723; 731", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "computer operating systems; computer systems, digital --- Multiprocessing; control systems --- Computer Applications; generalized executive for real-time multiprocessor applications; high-performance operating system primitives; real-time control systems; robotics", subject = "{\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Real-time systems and embedded systems. {\bf C.3} Computer Systems Organization, SPECIAL-PURPOSE AND APPLICATION-BASED SYSTEMS, Process control systems. {\bf C.3} Computer Systems Organization, SPECIAL-PURPOSE AND APPLICATION-BASED SYSTEMS, Real-time and embedded systems. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Design studies. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Multiprocessing/multiprogramming/multitasking. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Scheduling. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Message sending. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements. {\bf J.7} Computer Applications, COMPUTERS IN OTHER SYSTEMS, Industrial control. {\bf J.7} Computer Applications, COMPUTERS IN OTHER SYSTEMS, Process control. {\bf J.7} Computer Applications, COMPUTERS IN OTHER SYSTEMS, Real time. {\bf D.4.0} Software, OPERATING SYSTEMS, General.", } @Article{Harter:1987:RTL, author = "Paul K. {Harter, Jr.}", title = "Response Times in Level-Structured Systems", journal = j-TOCS, volume = "5", number = "3", pages = "232--248", month = aug, year = "1987", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-3/p232-harter/", abstract = "Real-time programs are among the most critical programs in use today, yet they are also among the worst understood and the most difficult to verify. Validation of real-time systems is nonetheless extremely important in view of the high costs associated with failure in typical application areas. We present here a method for deriving response-time properties in complex systems with a level structure based on priority. The method involves a level-by-level examination of the system, in which information distilled from each successive level is used to adjust the results for later levels. The results obtained at each level of the system are not affected by later analyses, which obviates having to consider a complex system as a whole.", acknowledgement = ack-nhfb, affiliationaddress = "Univ of Colorado, Boulder, CO, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "computer software --- Reliability; computer systems, digital; design; level-structured systems; performance; Performance; real-time systems; reliability; response times; verification", subject = "{\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Modeling and prediction. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Hierarchical design**. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Real-time systems and embedded systems. {\bf J.7} Computer Applications, COMPUTERS IN OTHER SYSTEMS, Industrial control. {\bf J.7} Computer Applications, COMPUTERS IN OTHER SYSTEMS, Process control. {\bf J.7} Computer Applications, COMPUTERS IN OTHER SYSTEMS, Real time. {\bf D.2.4} Software, SOFTWARE ENGINEERING, Software/Program Verification, Validation. {\bf D.2.4} Software, SOFTWARE ENGINEERING, Software/Program Verification, Validation.", } @Article{Herlihy:1987:CVA, author = "Maurice Herlihy", title = "Concurrency Versus Availability: Atomicity Mechanisms for Replicated Data", journal = j-TOCS, volume = "5", number = "3", pages = "249--274", month = aug, year = "1987", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-3/p249-herlihy/", abstract = "A replicated object is a typed data object that is stored redundantly at multiple locations to enhance availability. Most techniques for managing replicated data have a two-level structure: At the higher level, a replica-control protocol reconstructs the object's state from its distributed components, and at the lower level, a standard concurrency-control protocol synchronizes accesses to the individual components. This paper explores an alternative approach to managing replicated data by presenting two replication methods in which concurrency control and replica management are handled by a single integrated protocol. These integrated protocols permit more concurrency than independent protocols, and they allow availability and concurrency to be traded off: Constraints on concurrency may be relaxed if constraints on availability are tightened, and vice versa. In general, constraints on concurrency and availability cannot be minimized simultaneously.", acknowledgement = ack-nhfb, affiliationaddress = "Carnegie-Mellon Univ, Pittsburgh, PA, USA", classification = "723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "atomicity mechanisms; computer programming --- Algorithms; database systems; replicated data", subject = "{\bf D.3.3} Software, PROGRAMMING LANGUAGES, Language Constructs and Features. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Distributed databases. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Concurrency.", } @Article{Kirkman:1987:OCP, author = "W. Worth Kirkman", title = "An Optimized Contention Protocol for Broadband Networks", journal = j-TOCS, volume = "5", number = "3", pages = "275--283", month = aug, year = "1987", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-3/p275-kirkman/", abstract = "This paper describes the concepts underlying an alternative link-level protocol for broadband local networks. The protocol uses implicit slotting of the contention channel to support larger networks, improve performance, and provide reliable distributed collision recognition without reinforcement. It is designed such that compatible interfaces to existing CSMA\slash CD-based systems can be provided.", acknowledgement = ack-nhfb, affiliationaddress = "MITRE Corp, McLean, VA, USA", classification = "723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "algorithms; broadband networks; carrier sense multiple access/collision detection network; computer networks; CSMA/CD-based systems; data transmission; local networks; optimized contention protocol; performance; Protocols", subject = "{\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols. {\bf C.2.1} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design, Packet-switching networks.", } @Article{Sanders:1987:ISD, author = "Beverly A. Sanders", title = "The Information Structure of Distributed Mutual Exclusion Algorithms", journal = j-TOCS, volume = "5", number = "3", pages = "284--299", month = aug, year = "1987", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-3/p284-sanders/", abstract = "The concept of an information structure is introduced as a unifying principle behind several of the numerous algorithms that have been proposed for the distributed mutual exclusion problem. This approach allows the development of a generalized mutual exclusion algorithm that accepts a particular information structure at initialization and realizes both known and new algorithms as special cases. Two simple performance metrics of a realized algorithm can be obtained directly from the information structure. A new failure recovery mechanism called local recovery, which requires no coordination between nodes and no additional messages beyond that needed for failure detection, is introduced.", acknowledgement = ack-nhfb, affiliationaddress = "Univ of Maryland, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "algorithms; computer programming --- Algorithms; computer systems, digital; design; Distributed; distributed mutual exclusion algorithms; failure recovery; local recovery; performance; reliability; theory", subject = "{\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Network operating systems. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Reliability, availability, and serviceability. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Mutual exclusion. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Fault-tolerance.", } @Article{Thiebaut:1987:FC, author = "Dominique Thiebaut and Harold S. Stone", title = "Footprints in the Cache", journal = j-TOCS, volume = "5", number = "4", pages = "305--329", month = nov, year = "1987", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-4/p305-thiebaut/", abstract = "This paper develops an analytical model for cache-reload transients and compares the model to observations based on several address traces. The cache-reload transient is the set of cache misses that occur when a process is reinitiated after being suspended temporarily. For example, an interrupt program that runs periodically experiences a reload transient at each initiation. The reload transient depends on the cache size and on the sizes of the footprints in the cache of the competing programs, where a program footprint is defined to be the set of lines in the cache in active use by the program. The model shows that the size of the transient is related to the normal distribution function. A simulation based on program-address traces shows excellent agreement between the model and the observations.", acknowledgement = ack-nhfb, affiliationaddress = "Univ of Massachusetts, MA, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "address traces; cache-reload transients; computer architecture; computer operating systems --- Storage Allocation; data storage units; design; experimentation; memory structures; performance; program footprint; theory; trace-driven simulation", subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Cache memories. {\bf B.3.3} Hardware, MEMORY STRUCTURES, Performance Analysis and Design Aids**, Formal models**. {\bf B.3.3} Hardware, MEMORY STRUCTURES, Performance Analysis and Design Aids**, Simulation**. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Modeling techniques. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Multiprocessing/multiprogramming/multitasking. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Swapping**.", } @Article{Falcone:1987:PIL, author = "Joseph R. Falcone", title = "A Programmable Interface Language for Heterogeneous Distributed Systems", journal = j-TOCS, volume = "5", number = "4", pages = "330--351", month = nov, year = "1987", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-4/p330-falcone/", abstract = "The performance requirements of systems of personal-computer workstations places a strain on traditional approaches to network architecture. The integration of diverse systems into this environment introduces functional compatibility issues that are not present in homogeneous networks. This paper proposes a distributed system architecture in which communication follows a programming paradigm. In this architecture a programming language provides remote service interfaces for the heterogeneous distributed system environment. This language is a flexible and efficient medium for implementing service function protocols. In essence, clients and servers communication by programming one another.", acknowledgement = ack-nhfb, affiliationaddress = "Digital Equipment Corp", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "computer interfaces; computer networks --- Local Networks; computer programming languages; computer systems, digital --- Distributed; computers, personal; heterogeneous distributed systems; personal computer workstation networks; programmable interface language", } @Article{Koch:1987:DFA, author = "Philip D. L. Koch", title = "Disk File Allocation Based on the Buddy System", journal = j-TOCS, volume = "5", number = "4", pages = "352--370", month = nov, year = "1987", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-4/p352-koch/", abstract = "A variant of the binary buddy system that reduces fragmentation is described. Files are allocated on up to t extents, and inoptimally allocated files are periodically reallocated. The Dartmouth Time-Sharing System (DTSS) uses this method. Several installations, representing different classes of workload are studied to measure the method's performance. The results indicate that compared to the file layout method used by UNIX, the buddy system results in more efficient access but less efficient utilization of disk space. As disks become larger and less expensive per byte, strategies that achieve efficient I/O throughput at the expense of some storage loss become increasingly attractive.", acknowledgement = ack-nhfb, affiliationaddress = "Dartmouth Coll, USA", classification = "723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "buddy system; computer operating systems; data processing --- File Organization; disk file allocation; dynamic memory management; dynamic storage allocation; file system design; measurement; performance; Storage Allocation", subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, File organization. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Access methods. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Allocation/deallocation strategies. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Secondary storage. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements. {\bf H.3.2} Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Storage, File organization. {\bf E.5} Data, FILES, Organization/structure.", } @Article{Herzberg:1987:PPS, author = "Amir Herzberg and Shlomit S. Pinter", title = "Public Protection of Software", journal = j-TOCS, volume = "5", number = "4", pages = "371--393", month = nov, year = "1987", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-4/p371-herzberg/", abstract = "One of the overwhelming problems that software producers must contend with is the unauthorized use and distribution of their products. Copyright laws concerning software are rarely enforced, thereby causing major losses to the software companies. Technical means of protecting software from illegal duplication are required, but the available means are imperfect. We present protocols that enable software protection, without causing substantial overhead in distribution and maintenance. The protocols may be implemented by a conventional cryptosystem, such as the DES, or by a public key cryptosystem, such as the RSA. Both implementations are proved to satisfy required security criteria.", acknowledgement = ack-nhfb, affiliationaddress = "Technion-Israel Inst of Technology, Isr", classification = "723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "algorithms; computer software; cryptographic protocols; cryptography; design; Protection; public key cryptosystems; security; security protocols; single key cryptosystems", subject = "{\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection, Cryptographic controls. {\bf K.5.1} Computing Milieux, LEGAL ASPECTS OF COMPUTING, Hardware/Software Protection. {\bf E.3} Data, DATA ENCRYPTION, Public key cryptosystems. {\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection.", } @Article{Babaoglu:1987:RCB, author = "{\"O}zalp Babao{\u{g}}lu", title = "On the Reliability of Consensus-Based Fault-Tolerant Distributed Computing Systems", journal = j-TOCS, volume = "5", number = "4", pages = "394--416", month = nov, year = "1987", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-4/p394-babaoglu/", abstract = "Using a stochastic model of processor failure times, we investigate design choices such as replication level, protocol running time, randomized versus deterministic protocols, fault detection, and authentication. We use the probability with which a system produces the correct output as our evaluation criterion. This contrasts with previous fault-tolerance results that guarantee correctness only if the percentage of faulty processors in the system can be bounded. Our results reveal some subtle and counterintuitive interactions between the design parameters and system reliability.", acknowledgement = ack-nhfb, affiliationaddress = "Cornell Univ, USA", classification = "722; 723; 913", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "Byzantine agreement; computer systems, digital; design; deterministic protocols; distributed consensus; Fault Tolerant Capability; fault-tolerant distributed system; performance; randomized protocols; reliability", subject = "{\bf B.1.3} Hardware, CONTROL STRUCTURES AND MICROPROGRAMMING, Control Structure Reliability, Testing, and Fault-Tolerance**, Redundant design**. {\bf B.3.4} Hardware, MEMORY STRUCTURES, Reliability, Testing, and Fault-Tolerance**, Redundant design**. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Reliability, availability, and serviceability.", } @Article{Jones:1988:PSI, author = "Anita K. Jones", title = "Preface: Special Issue on Operating Systems Principles", journal = j-TOCS, volume = "6", number = "1", pages = "1--2", month = feb, year = "1988", bibdate = "Thu Jan 14 11:09:14 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Terry:1988:MSV, author = "Douglas B. Terry and Daniel C. Swinehart", title = "Managing Stored Voice in the {Etherphone} System", journal = j-TOCS, volume = "6", number = "1", pages = "3--27", month = feb, year = "1988", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-1/p3-terry/", abstract = "The voice manager in the Etherphone system provides facilities for recording, editing, and playing stored voice in a distributed personal-computing environment. To facilitate sharing, the voice manager stores voice on a special voice file server that is accessible via the local internet. Operations for editing a passage of recorded voice simply build persistent data structures to represent the edited voice. These data structures, implementing an abstraction called voice ropes, are stored in a server database and consist of lists of intervals within voice files. Clients refer to voice ropes solely by reference. Interests, additional persistent data structures maintained by the server, provide a sort of directory service for managing the voice ropes that have been created as well as a reliable reference-counting mechanism, permitting the garbage collection of voice ropes that are no longer needed.", acknowledgement = ack-nhfb, affiliationaddress = "XEROX Palo Alto Research Cent, Palo Alto, CA, USA", classification = "718; 723", conference = "1987 ACM\slash SIGOPS Symposium on Operating Systems Principles.", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "computer networks; computer operating systems; computer systems, digital --- Distributed; data processing --- Data Structures; design; digital communication systems; electronic mail; Etherphone system; management; performance; security; voice editing; voice file server; voice manager; Voice/Data Integrated Services", sponsor = "ACM, Special Interest Group on Operating Systems, New York, NY, USA", subject = "{\bf H.4.3} Information Systems, INFORMATION SYSTEMS APPLICATIONS, Communications Applications. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Allocation/deallocation strategies. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Storage hierarchies. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management. {\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection, Access controls. {\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection, Cryptographic controls. {\bf E.2} Data, DATA STORAGE REPRESENTATIONS. {\bf H.2.8} Information Systems, DATABASE MANAGEMENT, Database Applications. {\bf H.4.3} Information Systems, INFORMATION SYSTEMS APPLICATIONS, Communications Applications, Electronic mail. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems.", } @Article{Chang:1988:SAP, author = "Albert Chang and Mark F. Mergen", title = "801 Storage: Architecture and Programming", journal = j-TOCS, volume = "6", number = "1", pages = "28--50", month = feb, year = "1988", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-1/p28-chang/", abstract = "Based on novel architecture, the 801 minicomputer project has developed a low-level storage manager that can significantly simplify storage programming in subsystems and applications. The storage manager embodies three ideas: (1) large virtual storage, to contain all temporary data and permanent files for the active programs; (2) the innovation of database storage, which has implicit properties of access serializability and atomic update, similar to those of database transaction systems; and (3) access to all storage, including files, by the usual operations and types of a high-level programming language. The IBM RT PC implements the hardware architecture necessary for these storage facilities in its storage controller (MMU). The storage manager and language elements required, as well as subsystems and applications that use them, have been implemented and studied in a prototype operating system called CPR, that runs on the RT PC. Low cost and good performance are achieved in both hardware and software. The design is intended to be extensible across a wide performance\slash cost spectrum.", acknowledgement = ack-nhfb, affiliationaddress = "IBM T. J. Watson Research Cent, Yorktown Heights, NY, USA", classification = "723", conference = "1987 ACM\slash SIGOPS Symposium on Operating Systems Principles.", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "801 minicomputer project; algorithms; computer architecture; computer operating systems; computers, minicomputer; CPR operating system; design; experimentation; IBM RT PC; low-level storage manager; performance", sponsor = "ACM, Special Interest Group on Operating Systems, New York, NY, USA", subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Virtual memory. {\bf C.1.1} Computer Systems Organization, PROCESSOR ARCHITECTURES, Single Data Stream Architectures, RISC. {\bf D.3.3} Software, PROGRAMMING LANGUAGES, Language Constructs and Features. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design.", } @Article{Howard:1988:SPD, author = "John H. Howard and Michael L. Kazar and Sherri G. Menees and David A. Nichols and M. Satyanarayanan and Robert N. Sidebotham and Michael J. West", title = "Scale and Performance in a Distributed File System", journal = j-TOCS, volume = "6", number = "1", pages = "51--81", month = feb, year = "1988", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-1/p51-howard/", abstract = "The Andrew File System is a location-transparent distributed file system that will eventually span more than 5000 workstations at Carnegie Mellon University. Large scale affects performance and complicates system operation. In this paper we present observations of a prototype implementation, motivate changes in the areas of cache validation, server process structure, name translation, and low-level storage representation, and quantitatively demonstrate Andrew's ability to scale gracefully. We establish the importance of whole-file transfer and caching in Andrew by comparing its performance with that of Sun Microsystem's NFS file system. We also show how the aggregation of files into volumes improves the operability of the system.", acknowledgement = ack-nhfb, affiliationaddress = "Carnegie Mellon Univ, Pittsburgh, PA, USA", classification = "723", conference = "1987 ACM\slash SIGOPS Symposium on Operating Systems Principles.", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "Andrew File System; computer operating systems; computer systems, digital --- Distributed; design; distributed file system; experimentation; file transfer; measurement; performance", sponsor = "ACM, Special Interest Group on Operating Systems, New York, NY, USA", subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Distributed file systems. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements.", } @Article{Haskin:1988:RMQ, author = "Roger Haskin and Yoni Malachi and Wayne Sawdon and Gregory Chan", title = "Recovery Management in {QuickSilver}", journal = j-TOCS, volume = "6", number = "1", pages = "82--108", month = feb, year = "1988", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-1/p82-haskin/", abstract = "This paper describes QuickSilver, which uses atomic transactions as a unified failure recovery mechanism for a client-server structured distributed system. Transactions allow failure atomicity for related activities at a single server or at a number of independent servers. Rather than bundling transaction management into a dedicated language or recoverable object manager, QuickSilver exposes the basic commit protocol and log recovery primitives, allowing clients and servers to tailor their recovery techniques to their specific needs. Servers can implement their own log recovery protocols rather than being required to use a system-defined protocol. These decisions allow servers to make their own choices to balance simplicity, efficiency, and recoverability.", acknowledgement = ack-nhfb, affiliationaddress = "IBM, Almaden Research Cent, San Jose, CA, USA", classification = "723", conference = "1987 ACM\slash SIGOPS Symposium on Operating Systems Principles.", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "atomic transactions; computer operating systems; computer systems, digital --- Distributed; design; experimentation; failure atomicity; performance; QuickSilver; recovery management; reliability", sponsor = "ACM, Special Interest Group on Operating Systems, New York, NY, USA", subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Distributed file systems. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, File organization. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Maintenance**. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, QuickSilver. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Checkpoint/restart. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Distributed databases. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf H.2.2} Information Systems, DATABASE MANAGEMENT, Physical Design, Recovery and restart.", } @Article{Jul:1988:FGM, author = "Eric Jul and Henry Levy and Norman Hutchinson and Andrew Black", title = "Fine-Grained Mobility in the {Emerald} System", journal = j-TOCS, volume = "6", number = "1", pages = "109--133", month = feb, year = "1988", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-1/p109-jul/", abstract = "Emerald is an object-based language and system designed for the construction of distributed programs. An explicit goal of Emerald is support for object mobility; objects in Emerald can freely move within the system to take advantage of distribution and dynamically changing environments. We say that Emerald has fine-grained mobility because Emerald objects can be small data objects as well as process objects. Fine-grained mobility allows us to apply mobility in new ways but presents implementation problems as well. This paper discusses the benefits of fine-grained mobility, the Emerald language and run-time mechanisms that support mobility, and techniques for implementing mobility that do not degrade the performance of local operations. Performance measurements of the current implementation are included.", acknowledgement = ack-nhfb, affiliationaddress = "Univ of Washington, Seattle, WA, USA", classification = "723", conference = "1987 ACM\slash SIGOPS Symposium on Operating Systems Principles.", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "computer operating systems; computer programming languages; computer systems, digital --- Distributed; design; distributed languages; emerald; languages; measurement; object-oriented languages; performance; process mobility", sponsor = "ACM, Special Interest Group on Operating Systems, New York, NY, USA", subject = "{\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed applications. {\bf D.2.6} Software, SOFTWARE ENGINEERING, Programming Environments. {\bf D.3.3} Software, PROGRAMMING LANGUAGES, Language Constructs and Features, Abstract data types. {\bf D.3.3} Software, PROGRAMMING LANGUAGES, Language Constructs and Features, Control structures. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Distributed systems. {\bf D.3.2} Software, PROGRAMMING LANGUAGES, Language Classifications, Emerald.", } @Article{Nelson:1988:CSN, author = "Michael N. Nelson and Brent B. Welch and John K. Ousterhout", title = "Caching in the {Sprite} Network File System", journal = j-TOCS, volume = "6", number = "1", pages = "134--154", month = feb, year = "1988", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-1/p134-nelson/", abstract = "The Sprite network operating system uses large main-memory disk block caches to achieve high performance in its file system. It provides non-write-through file caching on both client and server machines. A simple cache consistency mechanism permits files to be shared by multiple clients without danger of stale data. In order to allow the file cache to occupy as much memory as possible, the file system of each machine negotiates with the virtual memory system over physical memory usage and changes the size of the file cache dynamically. Benchmark programs indicate that client caches allow diskless Sprite workstations to perform within 0-12 percent of workstations with disks. In addition, client caching reduces server loading by 50 percent and network traffic by 90 percent.", acknowledgement = ack-nhfb, affiliationaddress = "Univ of California at Berkeley, Berkeley, CA, USA", classification = "723", conference = "1987 ACM\slash SIGOPS Symposium on Operating Systems Principles.", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "cache consistency; computer operating systems; computer systems, digital --- Distributed; design; distributed file caching; distributed file systems; measurement; performance; sprite network", sponsor = "ACM, Special Interest Group on Operating Systems, New York, NY", subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Distributed file systems. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Distributed systems. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Distributed memories. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Main memory. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Secondary storage. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Virtual memory.", } @Article{Snodgrass:1988:RAM, author = "Richard Snodgrass", title = "A Relational Approach to Monitoring Complex Systems", journal = j-TOCS, volume = "6", number = "2", pages = "157--196", month = may, year = "1988", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-2/p157-snodgrass/", abstract = "Traditional monitoring techniques are inadequate when monitoring complex systems such as multiprocessors or distributed systems. A new approach is described in which a historical database forms the conceptual basis for the information processed by the monitor. This approach permits advances in specifying the low-level data collection, specifying the analysis of the collected data, performing the analysis, and displaying the results. Two prototype implementations demonstrate the feasibility of the approach.", acknowledgement = ack-nhfb, affiliationaddress = "Univ of North Carolina, NC, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "computer systems, digital; data processing --- Data Reduction and Analysis; database systems --- Relational; design; distributed systems; experimentation; languages; low-level data collection; measurement; Monitoring; multiprocessors; performance", subject = "{\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Monitors. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed applications. {\bf D.2.6} Software, SOFTWARE ENGINEERING, Programming Environments. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements. {\bf D.2.5} Software, SOFTWARE ENGINEERING, Testing and Debugging. {\bf H.2.3} Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf H.2.3} Information Systems, DATABASE MANAGEMENT, Languages, QUEL. {\bf H.2.1} Information Systems, DATABASE MANAGEMENT, Logical Design, Data models.", } @Article{Sandhu:1988:NTD, author = "Ravinderpal S. Sandhu", title = "The {NTree}: a Two Dimension Partial Order for Protection Groups", journal = j-TOCS, volume = "6", number = "2", pages = "197--222", month = may, year = "1988", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-2/p197-sandhu/", abstract = "The benefits of providing access control with groups of users rather than with individuals as the unit of granularity are enhanced if the groups are organized in a subgroup partial order. A class of such partial orders, called ntrees, is defined by using a forest of rooted trees or inverted rooted trees as basic partial orders and combining these by refinement. Refinement explodes an existing group into a partially ordered ntree of new groups while maintaining the same relationship between each new group and the nonexploded groups that the exploded group had. Examples are discussed to show the practical significance of ntrees and the refinement operation.", acknowledgement = ack-nhfb, affiliationaddress = "Ohio State Univ, OH, USA", classification = "722; 723; 921", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "access control; computer systems, digital; data processing --- Security of Data; design; management; mathematical techniques --- Trees; ntree; protection groups; security; theory; two-dimensional partial order", subject = "{\bf H.2.0} Information Systems, DATABASE MANAGEMENT, General, Security, integrity, and protection**. {\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection. {\bf K.6.m} Computing Milieux, MANAGEMENT OF COMPUTING AND INFORMATION SYSTEMS, Miscellaneous, Security*. {\bf H.3.3} Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Search process. {\bf I.2.8} Computing Methodologies, ARTIFICIAL INTELLIGENCE, Problem Solving, Control Methods, and Search, Graph and tree search strategies.", } @Article{Gross:1988:MEM, author = "Thomas R. Gross and John L. Hennessy and Steven A. Przybylski and Christopher Rowen", title = "Measurement and Evaluation of the {MIPS} Architecture and Processor", journal = j-TOCS, volume = "6", number = "3", pages = "229--257", month = aug, year = "1988", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-3/p229-gross/", abstract = "MIPS is a 32-bit processor architecture that has been implemented as an nMOS VLSI chip. The instruction set architecture is RISC-based. Close coupling with compilers and efficient use of the instruction set by compiled programs were goals of the architecture. The MIPS architecture requires that the software implement some constraints in the design that are normally considered part of the hardware implementation. This power presents experimental results on the effectiveness of this processor as a program host. Using sets of large and small benchmarks, the instruction and operand usage patterns are examined both for optimized and unoptimized code.", acknowledgement = ack-nhfb, affiliationaddress = "Stanford Univ, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "32-bit microprocessor; benchmarks; computer architecture --- Performance; computers, microcomputer; design; Evaluation; experimentation; measurement; MIPS; performance", subject = "{\bf C.1.1} Computer Systems Organization, PROCESSOR ARCHITECTURES, Single Data Stream Architectures, Pipeline processors**. {\bf C.0} Computer Systems Organization, GENERAL, Instruction set design. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Design studies. {\bf C.5.4} Computer Systems Organization, COMPUTER SYSTEM IMPLEMENTATION, VLSI Systems.", } @Article{Gifford:1988:RPP, author = "David K. Gifford and Nathan Glasser", title = "Remote Pipes and Procedures for Efficient Distributed Communication", journal = j-TOCS, volume = "6", number = "3", pages = "258--283", month = aug, year = "1988", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-3/p258-gifford/", abstract = "We describe a new communications model for distributed systems that combines the advantages of remote procedure call with the efficient transfer of bulk data. Three ideas form the basis of this model. First, remote procedures are first-class values which can be freely exchanged among nodes, thus enabling a greater variety of protocols to be directly implemented in a remote procedure call framework. Second, a new type of abstract object, called a pipe, allows bulk data and incremental results to be efficiently transported in a type-safe manner. Third, the relative sequencing of pipes and procedures can be controlled by combining them into channel groups. Calls on the members of a channel group are guaranteed to be processed in order. Application experience with this model, which we call the Channel Model, is reported. Derived performance bounds and experimental measures are presented.", acknowledgement = ack-nhfb, affiliationaddress = "MIT, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "bulk data transfer; channel model; computer systems, digital; data transmission; design; Distributed; performance; performance bounds; remote procedure call", subject = "{\bf C.2.1} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design, Network communications. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed applications. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Performance attributes.", } @Article{Johnson:1988:SSR, author = "Dale M. Johnson and F. Javier Thayer", title = "Stating Security Requirements with Tolerable Sets", journal = j-TOCS, volume = "6", number = "3", pages = "284--295", month = aug, year = "1988", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-3/p284-johnson/", abstract = "This paper introduces and develops the concept of tolerable sets for analyzing general security requirements. Tolerable sets, and corresponding purging functions and invisibility based on the sets, are used to state and test such requirements. Some particular applications are described, and some critical remarks about purging functions are included.", acknowledgement = ack-nhfb, affiliationaddress = "MITRE Corp, USA", classification = "723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "computer security requirements; computer systems, digital; data processing; purging functions; security; Security of Data; tolerable sets; verification", subject = "{\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection, Information flow controls. {\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection, Security kernels**. {\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection, Verification**. {\bf F.3.1} Theory of Computation, LOGICS AND MEANINGS OF PROGRAMS, Specifying and Verifying and Reasoning about Programs, Specification techniques.", } @Article{Colwell:1988:PEA, author = "Robert P. Colwell and Edward F. Gehringer and E. Douglas Jensen", title = "Performance Effects of Architectural Complexity in the {Intel 432}", journal = j-TOCS, volume = "6", number = "3", pages = "296--339", month = aug, year = "1988", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-3/p296-colwell/", abstract = "The Intel 432 is noteworthy as an architecture incorporating a large amount of functionality that most other systems perform by software. This paper examines the performance impact of the incorporation of several kinds of functionality. Among these are the addressing structure, the caches, instruction alignment, the buses, and the way that garbage collection is handled. A set of several benchmarks is used to quantify the performance effect of each of these decisions. The results indicate that the 432 could have been speeded up very significantly if a small number of implementation decisions had been made differently, and if incrementally better technology had been used in its construction.", acknowledgement = ack-nhfb, affiliationaddress = "Multiflow Computer Inc", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "architectural complexity; computer programming; computer systems, digital --- Parallel Processing; design; Intel 432; measurement; object-based programming environment; performance; Performance; security", subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Design studies. {\bf B.5.m} Hardware, REGISTER-TRANSFER-LEVEL IMPLEMENTATION, Miscellaneous. {\bf C.1.1} Computer Systems Organization, PROCESSOR ARCHITECTURES, Single Data Stream Architectures, Single-instruction-stream, single-data-stream processors (SISD)**. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Multiple-instruction-stream, multiple-data-stream processors (MIMD). {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Parallel processors**. {\bf C.1.3} Computer Systems Organization, PROCESSOR ARCHITECTURES, Other Architecture Styles, Capability architectures**. {\bf C.1.3} Computer Systems Organization, PROCESSOR ARCHITECTURES, Other Architecture Styles, High-level language architectures**. {\bf C.1.3} Computer Systems Organization, PROCESSOR ARCHITECTURES, Other Architecture Styles, Stack-oriented processors**. {\bf D.3.4} Software, PROGRAMMING LANGUAGES, Processors, Compilers.", } @Article{Peterson:1988:PNS, author = "Larry L. Peterson", title = "The {Profile} Naming Service", journal = j-TOCS, volume = "6", number = "4", pages = "341--364", month = nov, year = "1988", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-4/p341-peterson/", abstract = "Profile is a descriptive naming service used to identify users and organizations. This paper presents a structural overview of Profile's three major components: a confederation of attribute-based name servers, a name space abstraction that unifies the name servers, and a user interface that integrates the name space with existing naming systems. Each name server is an independent authority that allows clients to describe users and organizations with a multiplicity of attributes; the name space abstraction is a client program that implements a discipline for searching a sequence of name servers; and the interface provides a tool with which users build customized commands. Experience with an implementation in the DARPA\slash NSF Internet demonstrates that Profile is a feasible and effective mechanism for naming users and organizations in a large internet.", acknowledgement = ack-nhfb, affiliation = "Univ of Arizona", affiliationaddress = "Tucson, AZ, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "Attribute-Based Name Servers; Computer Networks; Computer Programming --- Algorithms; Computer Systems, Digital; DARPA-NSF Internet; Database Systems --- Distributed; design; Distributed; human factors; Name Space Abstraction; Naming Service; Profile; User Interface", subject = "{\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Distributed databases. {\bf H.3.4} Information Systems, INFORMATION STORAGE AND RETRIEVAL, Systems and Software, Question-answering (fact retrieval) systems**. {\bf H.3.3} Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Search process.", } @Article{Atkins:1988:ESD, author = "M. Stella Atkins", title = "Experiments in {SR} with Different Upcall Program Structures", journal = j-TOCS, volume = "6", number = "4", pages = "365--392", month = nov, year = "1988", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-4/p365-atkins/", abstract = "This paper explores program designs for layered systems such as communication protocols and server\slash client systems that do not exhibit a strict hierarchy in their control flow. D. D. Clark (1985) has proposed structuring such systems, where both upward and downward control flow are required, to use efficient synchronous procedure calls between the layers whenever possible. The term upcall is used by Clark to describe this synchronous upward communication from server to client. Several techniques are possible for structuring such programs using upcalls. Comparisons are made by implementing a communication protocol described by Clark in three different ways. The first method implements all the protocol routines in a single large module. The second method structures the routines into modules occupying vertical slices of the protocol layers, and the third method structures the routines into modules corresponding to the protocol layers. It is concluded that the vertically layered protocol design is to be preferred unless there are many shared variables between the send-side and receive-side, as it is very efficient and provides the best protection of clients from each other. The horizontally layered design is the least efficient, but it is the easiest to program.", acknowledgement = ack-nhfb, affiliation = "Simon Fraser Univ", affiliationaddress = "Burnaby, BC, Can", classification = "723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "Computer Networks --- Protocols; Computer Programming --- Algorithms; Computer Software; design; Design; languages; Layered Systems; performance; Server/Client Systems; Upcall Program Structures", subject = "{\bf D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent Programming. {\bf D.3.3} Software, PROGRAMMING LANGUAGES, Language Constructs and Features, Concurrent programming structures. {\bf D.3.2} Software, PROGRAMMING LANGUAGES, Language Classifications, SR. {\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Buffering. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Hierarchical design**. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Simulation.", } @Article{Agarwal:1988:CPO, author = "Anant Agarwal and John Hennessy and Mark Horowitz", title = "Cache Performance of Operating System and Multiprogramming Workloads", journal = j-TOCS, volume = "6", number = "4", pages = "393--431", month = nov, year = "1988", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-4/p393-agarwal/", abstract = "Large caches are necessary in current high-performance computer systems to provide the required high memory bandwidth. Because a small decrease in cache performance can result in significant system performance degradation, accurately characterizing the performance of large caches is important. Although measurements on actual systems have shown that operating systems and multiprogramming can affect cache performance, previous studies have not focused on these effects. We have developed a program tracing technique called ATUM (Address Tracing Using Microcode) that captures realistic traces of multitasking workloads including the operating system. Examining cache behavior using these traces from a VAX processor shows that both the operating system and multiprogramming activity significantly degrade cache performance, with an even greater proportional impact on large caches. From a careful analysis of the causes of this degradation, we explore various techniques to reduce this loss. While seemingly little can be done to mitigate the effect of system references, multitasking cache miss activity can be substantially reduced with small hardware additions.", acknowledgement = ack-nhfb, affiliation = "Stanford Univ", affiliationaddress = "Stanford, CA, USA", classification = "723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "Address Tracing Using Microcode; ATUM; Cache Performance; Computer Operating Systems; Computer Systems Programming --- Multiprogramming; design; measurement; Multiprogramming Workloads; performance; Performance; Program Tracing", subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Cache memories. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Associative memories. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Virtual memory. {\bf B.3.3} Hardware, MEMORY STRUCTURES, Performance Analysis and Design Aids**, Formal models**. {\bf B.3.3} Hardware, MEMORY STRUCTURES, Performance Analysis and Design Aids**, Simulation**. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Design studies. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Measurement techniques. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Modeling techniques. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Multiprocessing/multiprogramming/multitasking. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements.", } @Article{Okamoto:1988:DMS, author = "Tatsuaki Okamoto", title = "A Digital Multisignature Scheme using Bijective Public-Key Cryptosystems", journal = j-TOCS, volume = "6", number = "4", pages = "432--441", month = nov, year = "1988", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-4/p432-okamoto/", abstract = "A new digital multisignature scheme using bijective public-key cryptosystems that overcomes the problems of previous signature schemes used for multisignatures is proposed. The principal features of this scheme are (1) the length of a multisignature message is nearly equivalent to that for a single signature message; (2) by using a one-way hash function, multisignature generation and verification are processed in an efficient manner; (3) the order of signing is not restricted; and (4) this scheme can be constructed on any bijective public-key cryptosystem as well as the RSA scheme. In addition, it is shown that the new scheme is considered as safe as the public-key cryptosystem used in this new scheme. Some variations based on the scheme are also presented.", acknowledgement = ack-nhfb, affiliation = "NTT", affiliationaddress = "Yokosuka, Jpn", classification = "723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "Bijective Public-Key Cryptosystems; Computer-Based Message Systems; Cryptography; Digital Multisignature Scheme; Electronic Mail; One-Way Hash Function; security", subject = "{\bf E.3} Data, DATA ENCRYPTION, Public key cryptosystems.", } @Article{Borg:1989:FTU, author = "Anita Borg and Wolfgang Blau and Wolfgang Graetsch and Ferdinand Herrmann and Wolfgang Oberle", title = "Fault Tolerance under {UNIX}", journal = j-TOCS, volume = "7", number = "1", pages = "1--24", month = feb, year = "1989", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-1/p1-borg/", abstract = "The initial design for a distributed, fault-tolerant version of UNIX based on three-way atomic message transmission was presented in an earlier paper. This paper describes the working system, now known as the TARGON\slash 32. The original design left open questions in at least two areas: fault tolerance for server processes and recovery after a crash were briefly and inaccurately sketched; rebackup after recovery was not discussed at all. The fundamental design involving three-way message transmission has remained unchanged. However, server backup has been redesigned and is now more consistent with that of normal user processes. Recovery and rebackup have been completed in a less centralized and thus more efficient manner. We review important aspects of the original design and note how the implementation differs from our original ideas. We then focus on the backup and recovery for server processes and the changes and additions in the design and implementation of recovery and rebackup.", acknowledgement = ack-nhfb, affiliation = "Nixdorf Computer GmbH", affiliationaddress = "Paderborn, West Ger", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "algorithms; Computer Architecture; Computer Operating Systems; Computer Systems, Digital; Crash Handling; Fault Tolerant Capability; Multiway Message Transmission; reliability; Roll Forward Recovery; Server Architecture; TARGON/32; UNIX", subject = "{\bf D.4.0} Software, OPERATING SYSTEMS, General, UNIX. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Backup procedures. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Checkpoint/restart. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Associative processors. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Message sending.", } @Article{Pittelli:1989:RST, author = "Frank M. Pittelli and H{\'e}ctor Garc{\'\i}a-Molina", title = "Reliable Scheduling in a {TMR} Database System", journal = j-TOCS, volume = "7", number = "1", pages = "25--60", month = feb, year = "1989", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-1/p25-pittelli/", abstract = "A Triple Modular Redundant (TMR) system achieves high reliability by replicating data and all processing at three independent nodes. When TMR is used for database processing all nonfaulty computers must execute the same sequence of transactions, and this is ensured by a collection of processes known as schedulers. In this paper we study the implementation of efficient schedulers through analysis of various enhancements such as null transactions and message batching. The schedulers have been implemented in an experimental TMR system and the evaluation results are presented here.", acknowledgement = ack-nhfb, affiliation = "US Naval Acad", affiliationaddress = "USA", classification = "723; 913", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "algorithms; Database Systems; design; Distributed; Message Batching; Null Transactions; performance; reliability; Reliability; Reliable Scheduling; Scheduling; Transaction Processing; Triple Modular Redundancy", subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Reliability, availability, and serviceability. {\bf H.2.0} Information Systems, DATABASE MANAGEMENT, General. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Scheduling.", } @Article{Raymond:1989:TBA, author = "Kerry Raymond", title = "A Tree-Based Algorithm for Distributed Mutual Exclusion", journal = j-TOCS, volume = "7", number = "1", pages = "61--77", month = feb, year = "1989", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-1/p61-raymond/", abstract = "We present an algorithm for distributed mutual exclusion in a computer network of N nodes that communicate by messages rather than shared memory. The algorithm uses a spanning tree of the computer network, and the number of messages exchanged per critical section depends on the topology of this tree. However, typically the number of messages exchanged is O(log N) under light demand, and reduces to approximately four messages under saturated demand. Each node holds information only about its immediate neighbors in the spanning tree rather than information about all nodes, and failed nodes can recover necessary information from their neighbors. The algorithm does not require sequence numbers as it operates correctly despite message overtaking.", acknowledgement = ack-nhfb, affiliation = "Univ of Queensland", affiliationaddress = "St. Lucia, Aust", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "algorithms; Computer Networks; Computer Programming--Algorithms; Computer Systems, Digital; design; Distributed; Distributed Mutual Exclusion; Mathematical Techniques--Trees; Message Passing; Tree Based Algorithms", subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Mutual exclusion. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Synchronization. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Message sending.", } @Article{Thompson:1989:ESA, author = "James G. Thompson and Alan Jay Smith", title = "Efficient (Stack) Algorithms for Analysis of Write-Back and Sector Memories", journal = j-TOCS, volume = "7", number = "1", pages = "78--117", month = feb, year = "1989", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-1/p78-thompson/", abstract = "For the class of replacement algorithms known as stack algorithms, existing analysis techniques permit the computation of memory miss ratios for all memory sizes simultaneously in one pass over a memory reference string. We extend the class of computations possible by this methodology in two ways. First, we show how to compute the effects of copy-backs in write-back caches. The key observation here is that a given block is clean for all memory sizes less than or equal to C blocks and is dirty for all larger memory sizes. Our technique permits efficient computations for algorithms or systems using periodic write-back and\slash or block deletion. The second extension permits stack analysis simulation for sector (or subblock) caches in which a sector (associated with an address tag) consists of subsectors (or subblocks) that can be loaded independently. The key observation here is that a subsector is present only in caches of size C or greater. Load forward prefetching in a sector cache is shown to be a stack algorithm and is easily simulated using our technique. Running times for our methods are only slightly higher than for a simulation of a single memory size using nonstack techniques.", acknowledgement = ack-nhfb, affiliation = "US Air Force", affiliationaddress = "USA", classification = "723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "algorithms; Cache Memories; Computer Programming--Algorithms; Data Storage, Digital; design; experimentation; measurement; Memory System Performance; performance; Performance; Replacement Algorithms; Sector Memories; Stack Algorithms; theory; Write Back Memories", subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles. {\bf B.6.1} Hardware, LOGIC DESIGN, Design Styles, Memory control and access**. {\bf B.3.3} Hardware, MEMORY STRUCTURES, Performance Analysis and Design Aids**, Simulation**. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance.", } @Article{Gupta:1989:HSI, author = "Anoop Gupta and Charles Forgy and Allen Newell", title = "High-speed Implementations of Rule-Based Systems", journal = j-TOCS, volume = "7", number = "2", pages = "119--146", month = may, year = "1989", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-2/p119-gupta/", abstract = "We explore various methods for speeding up the execution of rule-based systems. In particular, we examine the role of parallelism in the high-speed execution of rule-based systems and study the architectural issues in the design of computers for rule-based systems. Our results show that contrary to initial expectations, the speedup that can be obtained from parallelism is quite limited, only about tenfold. The reasons for the small speed-up are: (1) the small number of rules relevant to each change to data memory; (2) the large variation in the processing requirements of relevant rules; and (3) the small number of changes made to data memory between synchronization steps. Furthermore, we observe that to obtain this limited factor of tenfold speed-up, it is necessary to exploit parallelism at a very fine granularity. We propose that a suitable architecture to exploit such fine-grain parallelism is a shared-memory multiprocessor with 32-64 processors.", acknowledgement = ack-nhfb, affiliation = "Stanford Univ", affiliationaddress = "Stanford, CA, USA", classification = "723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "algorithms; Artificial Intelligence; Computer Architecture; Computer Systems, Digital--Parallel Processing; design; languages; performance; Production Systems; Rule Based Systems; Shared Memory Multiprocessors; Speedup", subject = "{\bf I.2.5} Computing Methodologies, ARTIFICIAL INTELLIGENCE, Programming Languages and Software. {\bf I.2.5} Computing Methodologies, ARTIFICIAL INTELLIGENCE, Programming Languages and Software, OPS5. {\bf D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent Programming. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Multiple-instruction-stream, multiple-data-stream processors (MIMD). {\bf G.1.0} Mathematics of Computing, NUMERICAL ANALYSIS, General, Parallel algorithms.", } @Article{Cheriton:1989:DGN, author = "David R. Cheriton and Timothy P. Mann", title = "Decentralizing a Global Naming Service for Improved Performance and Fault Tolerance", journal = j-TOCS, volume = "7", number = "2", pages = "147--183", month = may, year = "1989", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-2/p147-cheriton/", abstract = "We address the problem of a global naming system, proposing a three-level naming architecture that consists of global, administrational, and managerial naming mechanisms, each optimized to meet the performance, reliability, and security requirements at its own level. We focus in particular on a decentralized approach to the lower levels, in which naming is handled directly by the managers of the named objects. Client-name caching and multicast are exploited to implement name mapping with almost optimum performance and fault tolerance. We also show how the naming system can be made secure. Our conclusions are bolstered by experience with an implementation in the V distributed operating system.", acknowledgement = ack-nhfb, affiliation = "Stanford Univ", affiliationaddress = "Stanford, CA, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "Computer Fault Tolerance; Computer Operating Systems; Computer Systems, Digital; design; Distributed; Distributed File Systems; experimentation; Global Naming Service; measurement; performance; reliability", subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Distributed file systems. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems. {\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection.", } @Article{Agarwal:1989:ACM, author = "Anant Agarwal and Mark Horowitz and John Hennessy", title = "An Analytical Cache Model", journal = j-TOCS, volume = "7", number = "2", pages = "184--215", month = may, year = "1989", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-2/p184-agarwal/", abstract = "Trace-driven simulation and hardware measurement are the techniques most often used to obtain accurate performance figures for caches. The former requires a large amount of simulation time to evaluate each cache configuration while the latter is restricted to measurements of existing caches. An analytical cache model that uses parameters extracted from address traces of programs can efficiently provide estimates of cache performance and show the effects of varying cache parameters. By representing the factors that affect cache performance, we develop an analytical model that gives miss rates for a given trace as a function of cache size, degree of associativity, block size, subblock size, multiprogramming level, task switch interval, and observation interval. The predicted values closely approximate the results of trace-driven simulations, while requiring only a small fraction of the computation cost.", acknowledgement = ack-nhfb, affiliation = "Stanford Univ", affiliationaddress = "Stanford, CA, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "Cache Miss Rate; Cache Models; Computer Architecture; Data Storage Units; design; measurement; Memory Structures; performance; theory; Trace Driven Simulation", subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Cache memories. {\bf B.3.3} Hardware, MEMORY STRUCTURES, Performance Analysis and Design Aids**, Formal models**. {\bf B.3.3} Hardware, MEMORY STRUCTURES, Performance Analysis and Design Aids**, Simulation**. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Multiprocessing/multiprogramming/multitasking.", } @Article{Peterson:1989:PUC, author = "Larry L. Peterson and Nick C. Buchholz and Richard D. Schlichting", title = "Preserving and Using Context Information in Interprocess Communication", journal = j-TOCS, volume = "7", number = "3", pages = "217--246", month = aug, year = "1989", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-3/p217-peterson/", abstract = "When processes in a network communicate, the messages they exchange define a partial ordering of externally visible events. While the significance of this partial order in distributed computing is well understood, it has not been made an explicit part of the communication substrate upon which distributed programs are implemented. This paper describes a new interprocess communication mechanism, called Psync, that explicitly encodes this partial ordering with each message. The paper shows how Psync can be efficiently implemented on an unreliable communications network, and it demonstrates how conversations serve as an elegant foundation for ordering messages exchanged in a distributed computation and for recovering from processor failures.", acknowledgement = ack-nhfb, affiliation = "Univ of Arizona", affiliationaddress = "Tucson, AZ, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "Computer Fault Tolerance; Computer Programming--Algorithms; Computer Systems, Digital; Context Information; Database Systems--Distributed; design; Distributed; Interprocess Communication; Partial Ordering; performance; Psync Protocol; reliability", subject = "{\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols, Psync. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Network communication. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Design studies.", } @Article{Satyanarayanan:1989:ISL, author = "M. Satyanarayanan", title = "Integrating Security in a Large Distributed System", journal = j-TOCS, volume = "7", number = "3", pages = "247--280", month = aug, year = "1989", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-3/p247-satyanarayanan/", abstract = "Andrew is a distributed computing environment that is a synthesis of the personal computing and timesharing paradigms. When mature, it is expected to encompass over 5,000 workstations spanning the Carnegie Mellon University campus. This paper examines the security issues that arise in such an environment and describes the mechanisms that have been developed to address them. These mechanisms include the logical and physical separation of servers and clients, support for secure communication at the remote procedure call level, a distributed authentication service, a file-protection scheme that combines access lists with UNIX mode bits, and the use of encryption as a basic building block. The paper also discusses the assumptions underlying security in Andrew and analyzes the vulnerability of the system. Usage experience reveals that resource control, particularly of workstation CPU cycles, is more important than originally anticipated and that the mechanisms available to address this issue are rudimentary.", acknowledgement = ack-nhfb, affiliation = "Carnegie Mellon Univ", affiliationaddress = "Pittsburgh, PA, USA", classification = "722; 723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "algorithms; Andrew Distributed Computing Environment; Computer Security; Computer Systems, Digital; Computers, Personal; Cryptography; design; Distributed; security; Time Sharing", subject = "{\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection. {\bf C.0} Computer Systems Organization, GENERAL, Andrew. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Distributed file systems. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems. {\bf E.3} Data, DATA ENCRYPTION, Data encryption standard (DES)**.", } @Article{Shankar:1989:VDT, author = "A. Udaya Shankar", title = "Verified Data Transfer Protocols with Variable Flow Control", journal = j-TOCS, volume = "7", number = "3", pages = "281--316", month = aug, year = "1989", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-3/p281-shankar/", abstract = "We present and verify a sliding window protocol which uses modulo-N sequence numbers to achieve reliable flow-controlled data transfer between a producer and a consumer connected by unreliable channels. The consumer's data needs are represented by a receive window whose size can vary with time. The producer entity sends segments of data words that lie within the consumer's receive window. The consumer entity sends acknowledgement, selective acknowledgement, and selective reject messages that inform the producer entity of the current receive window size, the data word next expected, and the reception (or lack of reception) of out-of-sequence data segments. Our protocol is, therefore, a proper extension of existing transport and data link protocol standards such as TCP, ISO TP, HDLC, ADCCP, and so forth.", acknowledgement = ack-nhfb, affiliation = "Univ of Maryland", affiliationaddress = "College Park, MD, USA", classification = "723", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", journalabr = "ACM Trans Comput Syst", keywords = "Computer Networks--Protocols; Data Transfer Protocols; Data Transmission; design; Reliability; Sliding Window Protocol; theory; Variable Flow Control; verification", subject = "{\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols, Protocol verification. {\bf C.3} Computer Systems Organization, SPECIAL-PURPOSE AND APPLICATION-BASED SYSTEMS, Real-time and embedded systems. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Modeling and prediction. {\bf F.3.1} Theory of Computation, LOGICS AND MEANINGS OF PROGRAMS, Specifying and Verifying and Reasoning about Programs. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management.", } @Article{Li:1989:MCS, author = "Kai Li and Paul Hudak", title = "Memory Coherence in Shared Virtual Memory Systems", journal = j-TOCS, volume = "7", number = "4", pages = "321--359", month = nov, year = "1989", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-4/p321-li/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; experimentation; measurement; performance", subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Shared memory. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Interconnection architectures. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Virtual memory. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed applications.", } @Article{Ng:1989:UHI, author = "Tony P. Ng", title = "Using Histories to Implement Atomic Objects", journal = j-TOCS, volume = "7", number = "4", pages = "360--393", month = nov, year = "1989", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-4/p360-ng/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design", subject = "{\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Distributed databases. {\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Synchronization. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Concurrency. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Distributed systems.", } @Article{Barbara:1989:IAU, author = "Daniel Barbara and H{\'e}ctor Garc{\'\i}a-Molina and Annemarie Spauster", title = "Increasing Availability under Mutual Exclusion Constraints with Dynamic Vote Reassignment", journal = j-TOCS, volume = "7", number = "4", pages = "394--426", month = nov, year = "1989", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-4/p394-barbara/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; performance; reliability", subject = "{\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Network operating systems. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Mutual exclusion. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability.", } @Article{Schroeder:1990:PFR, author = "Michael D. Schroeder and Michael Burrows", title = "Performance of the {Firefly RPC}", journal = j-TOCS, volume = "8", number = "1", pages = "1--17", month = feb, year = "1990", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-1/p1-schroeder/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "measurement; performance", subject = "{\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Network operating systems. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors). {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Measurement techniques. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements.", } @Article{Burrows:1990:LA, author = "Michael Burrows and Martin Abadi and Roger Needham", title = "A Logic of Authentication", journal = j-TOCS, volume = "8", number = "1", pages = "18--36", month = feb, year = "1990", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-1/p18-burrows/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "security; theory; verification", subject = "{\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols, Protocol verification. {\bf C.2.0} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, General, Security and protection (e.g., firewalls). {\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection, Authentication. {\bf F.3.1} Theory of Computation, LOGICS AND MEANINGS OF PROGRAMS, Specifying and Verifying and Reasoning about Programs. {\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection, Cryptographic controls.", } @Article{Bershad:1990:LRP, author = "Brian N. Bershad and Thomas E. Anderson and Edward D. Lazowska and Henry M. Levy", title = "Lightweight Remote Procedure Call", journal = j-TOCS, volume = "8", number = "1", pages = "37--55", month = feb, year = "1990", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-1/p37-bershad/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; measurement; performance; security", subject = "{\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Distributed systems. {\bf C.1.3} Computer Systems Organization, PROCESSOR ARCHITECTURES, Other Architecture Styles, Capability architectures**. {\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection, Security kernels**. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements.", } @Article{Anderson:1990:SCM, author = "David P. Anderson and Ron Kuivila", title = "A System for Computer Music Performance", journal = j-TOCS, volume = "8", number = "1", pages = "56--82", month = feb, year = "1990", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-1/p56-anderson/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; experimentation; human factors; languages; performance", subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Scheduling. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Interactive systems. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Real-time systems and embedded systems. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Input/output.", } @Article{Deering:1990:MRD, author = "Stephen E. Deering and David R. Cheriton", title = "Multicast Routing in Datagram Internetworks and Extended {LANs}", journal = j-TOCS, volume = "8", number = "2", pages = "85--110", month = may, year = "1990", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-2/p85-deering/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; performance", subject = "{\bf C.2.1} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design, Network communications. {\bf C.2.5} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Local and Wide-Area Networks. {\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols, Protocol architecture. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems.", } @Article{Schwan:1990:TDO, author = "Karsten Schwan and Win Bo", title = "``Topologies'' --- Distributed Objects on Multicomputers", journal = j-TOCS, volume = "8", number = "2", pages = "111--157", month = may, year = "1990", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-2/p111-schwan/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; measurement; performance", subject = "{\bf D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent Programming, Parallel programming. {\bf G.1.0} Mathematics of Computing, NUMERICAL ANALYSIS, General, Parallel algorithms. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Parallel processors**. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Message sending.", } @Article{Ramakrishnan:1990:BFS, author = "K. K. Ramakrishnan and R. Jain", title = "A Binary Feedback Scheme for Congestion Avoidance in Computer Networks", journal = j-TOCS, volume = "8", number = "2", pages = "158--181", month = may, year = "1990", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-2/p158-ramakrishnan/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; performance", subject = "{\bf C.2.1} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design, Network communications. {\bf C.2.3} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Operations, Network monitoring. {\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols.", } @Article{Benson:1990:FPM, author = "Glenn S. Benson and Ian F. Akyildiz and William F. Aelbe", title = "A Formal Protection Model of Security in Centralized, Parallel, and Distributed Systems", journal = j-TOCS, volume = "8", number = "3", pages = "183--213", month = aug, year = "1990", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-3/p183-benson/", abstract = "One way to show that a system is not secure is to demonstrate that a malicious or mistake-prone user or program can break security by causing the system to reach a nonsecure state. A fundamental aspect of a security model is a proof that validates that every state reachable from a secure initial state is secure. A sequential security model assumes that every command that acts as a state transition executes sequentially, while a concurrent security model assumes that multiple commands execute concurrently. This paper presents a security model called the Centralized-Parallel-Distributed model (CPD model) that defines security for logically, or physically centralized, parallel, and distributed systems. The purpose of the CPD model is to define concurrency conditions that guarantee that a concurrent system cannot reach a state in which privileges are configured in a nonsecure manner. As an example, the conditions are used to construct a representation of a distributed system.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; security; theory; verification", subject = "{\bf C.2.0} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, General, Security and protection (e.g., firewalls). {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Parallel processors**. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Concurrency. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Scheduling. {\bf F.3.1} Theory of Computation, LOGICS AND MEANINGS OF PROGRAMS, Specifying and Verifying and Reasoning about Programs. {\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection, Access controls.", } @Article{King:1990:DAM, author = "Richard P. King", title = "Disk Arm Movement in Anticipation of Future Requests", journal = j-TOCS, volume = "8", number = "3", pages = "214--229", month = aug, year = "1990", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-3/p214-king/", abstract = "When a disk drive's access arm is idle, it may not be at the ideal location. In anticipation of future requests, movement to some other location may be advantageous. The effectiveness of anticipatory disk arm movement is explored. Various operating conditions are considered, and the reduction in seek distances and request response times is determined for them. Suppose that successive requests are independent and uniformly distributed. By bringing the arm to the middle of its range of motion when it is idle, the expected seek distance can be reduced by 25 percent. Nonlinearity in time versus distance can whittle that 25 percent reduction down to a 13 percent reduction in seek time. Nonuniformity in request location, nonPoisson arrival processes, and high arrival rates can whittle the reduction down to nothing. However, techniques are discussed that maximize those savings that are still possible under those circumstances. Various systems with multiple arms are analyzed. Usually, it is best to spread out the arms over the disk area. The both arms should be brought to the middle.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; performance", subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Secondary storage. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Modeling and prediction. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Simulation.", } @Article{Mitchell:1990:EPA, author = "Chad L. Mitchell and Michael J. Flynn", title = "The Effects of Processor Architecture on Instruction Memory Traffic", journal = j-TOCS, volume = "8", number = "3", pages = "230--250", month = aug, year = "1990", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-3/p230-mitchell/", abstract = "The relative amount of instruction traffic for two architectures is about the same in the presence of a large cache as with no cache. Furthermore, the presence of an intermediate-sized cache probably substantially favors the denser architecture. Encoding techniques have a much greater impact on instruction traffic than do the differences between instruction set families such as stack and register set. However, register set architectures have somewhat lower instruction traffic than directly comparable stack architectures of some local variables are allocated in registers. This study has clearly indicated that cache factors should be taken into consideration when making architectural tradeoffs. The differences in memory traffic between two architectures may be greatly amplified in the presence of a cache.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; performance", subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Cache memories. {\bf C.0} Computer Systems Organization, GENERAL, Instruction set design. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Performance attributes. {\bf B.3.3} Hardware, MEMORY STRUCTURES, Performance Analysis and Design Aids**, Simulation**.", } @Article{Gotzhein:1990:DPS, author = "Reinhard Gotzhein and Gregor von Bochmann", title = "Deriving Protocol Specifications from Service Specifications Including Parameters", journal = j-TOCS, volume = "8", number = "4", pages = "255--283", month = nov, year = "1990", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-4/p255-gotzhein/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; verification", subject = "{\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols, Protocol architecture. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems.", } @Article{Marzullo:1990:TFC, author = "Keith Marzullo", title = "Tolerating Failures of Continuous-Valued Sensors", journal = j-TOCS, volume = "8", number = "4", pages = "284--304", month = nov, year = "1990", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-4/p284-marzullo/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; performance", subject = "{\bf C.3} Computer Systems Organization, SPECIAL-PURPOSE AND APPLICATION-BASED SYSTEMS, Process control systems. {\bf F.3.1} Theory of Computation, LOGICS AND MEANINGS OF PROGRAMS, Specifying and Verifying and Reasoning about Programs.", } @Article{Lamport:1990:CRW, author = "Leslie Lamport", title = "Concurrent Reading and Writing of Clocks", journal = j-TOCS, volume = "8", number = "4", pages = "305--310", month = nov, year = "1990", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-4/p305-lamport/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; verification", subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Concurrency. {\bf D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent Programming. {\bf D.2.4} Software, SOFTWARE ENGINEERING, Software/Program Verification. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management.", } @Article{Goldszmidt:1990:HLL, author = "German S. Goldszmidt and Shaula Yemini", title = "High-level Language Debugging for Concurrent Programs", journal = j-TOCS, volume = "8", number = "4", pages = "311--336", month = nov, year = "1990", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-4/p311-goldszmidt/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "experimentation; verification", subject = "{\bf D.2.5} Software, SOFTWARE ENGINEERING, Testing and Debugging, Debugging aids. {\bf D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent Programming.", } @Article{Agrawal:1991:EFT, author = "Divyakant Agrawal and Amr {El Abbadi}", title = "An Efficient and Fault-Tolerant Solution for Distributed Mutual Exclusion", journal = j-TOCS, volume = "9", number = "1", pages = "1--20", month = feb, year = "1991", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-1/p1-agrawal/", abstract = "In this paper, we present an efficient and fault-tolerant algorithm for generating quorums to solve the distributed mutual exclusion problem. The algorithm uses a logical tree organization of the network to generate tree quorums, which are logarithmic in the size of the network in the best case. Our approach is resilient to both site and communication failures, even when such failures lead to network partitioning. Furthermore, the algorithm exhibits a property of graceful degradation, i.e., it requires more messages only as the number of failures increase in the network. We describe how tree quorums can be used for various distributed applications for providing mutually exclusive access to a distributed resource, managing replicated objects, and atomically committing a distributed transaction.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; reliability", subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Mutual exclusion. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Distributed systems. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems.", } @Article{Mellor-Crummey:1991:ASS, author = "John M. Mellor-Crummey and Michael L. Scott", title = "Algorithms for Scalable Synchronization on Shared-Memory Multiprocessors", journal = j-TOCS, volume = "9", number = "1", pages = "21--65", month = feb, year = "1991", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-1/p21-mellor-crummey/", abstract = "Busy-wait techniques are heavily used for mutual exclusion and barrier synchronization in shared-memory parallel programs. Unfortunately, typical implementations of busy-waiting tend to produce large amounts of memory and interconnect contention, introducing performance bottlenecks that become markedly more pronounced as applications scale. We argue that this problem is not fundamental, and that one can in fact construct busy-wait synchronization algorithms that induce no memory or interconnect contention. The key to these algorithms is for every processor to spin on separate locally-accessible flag variables, and for some other processor to terminate the spin with a single remote write operation at an appropriate time. Flag variables may be locally-accessible as a result of coherent caching, or by virtue of allocation in the local portion of physically distributed shared memory. We present a new scalable algorithm for spin locks that generates 0(1) remote references per lock acquisition, independent of the number of processors attempting to acquire the lock. Our algorithm provides reasonable latency in the absence of contention, requires only a constant amount of space per lock, and requires no hardware support other than a swap-with-memory instruction. We also present a new scalable barrier algorithm that generates 0(1) remote references per processor reaching the barrier, and observe that two previously-known barriers can likewise be cast in a form that spins only on locally-accessible flag variables. None of these barrier algorithms requires hardware support beyond the usual atomicity of memory reads and writes. We compare the performance of our scalable algorithms with other software approaches to busy-wait synchronization on both a Sequent Symmetry and a BBN Butterfly. Our principal conclusion is that contention due to synchronization need not be a problem in large-scale shared-memory multiprocessors. The existence of scalable algorithms greatly weakens the case for costly special-purpose hardware support for synchronization, and provides a case against so-called ``dance hall'' architectures, in which shared memory locations are equally far from all processors. ---From the Authors' Abstract", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; measurement; performance", subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Synchronization. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Shared memory. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Mutual exclusion. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Interconnection architectures. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Storage hierarchies. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Design studies.", } @Article{Huguet:1991:ASR, author = "Miquel Huguet and Tom{\'a}s Lang", title = "Architectural Support for Reduced Register Saving\slash Restoring in Single-Window Register Files", journal = j-TOCS, volume = "9", number = "1", pages = "66--97", month = feb, year = "1991", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-1/p66-huguet/", abstract = "The use of registers in a processor reduces the data and instruction memory traffic. Since this reduction is a significant factor in the improvement of the program execution time, recent VLSI processors have a large number of registers which can be used efficiently because of the advances in compiler technology. However, since registers have to be saved/restored across function calls, the corresponding register saving and restoring (RSR) memory traffic can almost eliminate the overall reduction. This traffic has been reduced by compiler optimizations and by providing multiple-window register files. Although these multiple-window architectures produce a large reduction in the RSR traffic, they have several drawbacks which make the single-window file preferable. We consider a combination of {\em hardware support\/} and {\em compiler optimizations\/} to reduce the RSR traffic for a single-window register file, beyond the reductions achieved by compiler optimizations alone. Basically, this hardware keeps track of the registers that are written during execution, so that the number of registers saved is minimized. Moreover, hardware is added so that a register is saved in the activation record of the function that uses it (instead of in the record of the current function); in this way a register is restored only when it is needed, rather than wholesale on procedure return. We present a register saving and restoring policy that makes use of this hardware, discuss its implementation, and evaluate the traffic reduction when the policy is combined with intraprocedural and interprocedural compiler optimizations. We show that, on the average for the four general-purpose programs measured, the RSR traffic is reduced by about 90 percent for a small register file (i.e., 32 registers), which results in an overall data memory traffic reduction of about 15 percent.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; languages; performance", subject = "{\bf B.5.2} Hardware, REGISTER-TRANSFER-LEVEL IMPLEMENTATION, Design Aids, Optimization. {\bf B.5.1} Hardware, REGISTER-TRANSFER-LEVEL IMPLEMENTATION, Design, Data-path design. {\bf B.7.1} Hardware, INTEGRATED CIRCUITS, Types and Design Styles, VLSI (very large scale integration). {\bf B.1.4} Hardware, CONTROL STRUCTURES AND MICROPROGRAMMING, Microprogram Design Aids, Languages and compilers.", } @Article{Zhang:1991:VNT, author = "Lixia Zhang", title = "{VirtualClock}: a New Traffic Control Algorithm for Packet-Switched Networks", journal = j-TOCS, volume = "9", number = "2", pages = "101--124", month = may, year = "1991", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-2/p101-zhang/", abstract = "One of the challenging research issues in building high-speed packet-switched networks is how to control the transmission rate of statistical data flows. This paper describes a new traffic control algorithm, {\em VirtualClock}, for high-speed network applications. VirtualClock monitors the average transmission rate of statistical data flows and provides every flow with guaranteed throughput and low queueing delay. It provides firewall protection among individual flows, as in a TDM system, while retaining the statistical multiplexing advantages of packet switching. Simulation results show that the VirtualClock algorithm meets all its design goals.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; performance", subject = "{\bf C.2.1} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design, Packet-switching networks. {\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols, Protocol architecture. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Performance attributes.", } @Article{Liskov:1991:EMO, author = "Barbara Liskov and Liuba Shrira and John Wroclawski", title = "Efficient At-Most-Once Messages Based on Synchronized Clocks", journal = j-TOCS, volume = "9", number = "2", pages = "125--142", month = may, year = "1991", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-2/p125-liskov/", abstract = "This paper describes a new at-most-once message passing protocol that provides guaranteed detection of duplicate messages even when the receiver has no state stored for the sender. It also discusses how to use at-most-once messages to implement higher-level primitives such as at-once-remote procedure calls and sequenced bytestream protocols. Our performance measurements indicate that at-most-once RPCs can provide at the same cost as less desirable forms of RPCs that do not guarantee at-most-once execution. Our method is based on the assumption that clocks throughout the system are loosely synchronized. Modern clock synchronization protocols provide good bounds on clock skew with high probability; our method depends on the bound for performance but not for correctness.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design", subject = "{\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols, Protocol architecture. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Message sending. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Performance attributes. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Measurement techniques.", } @Article{Bihari:1991:DAR, author = "Thomas E. Bihari and Karsten Schwan", title = "Dynamic Adaptation of Real-Time Software", journal = j-TOCS, volume = "9", number = "2", pages = "143--174", month = may, year = "1991", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-2/p143-bihari/", abstract = "In large, dynamic, real-time computer systems, it is frequently most cost effective to employ different software performance and reliability techniques at different levels of granularity, at different times, or within different subsystems. These techniques may include regulation of redundancy and resource allocation, multiversion and multipath execution, adjustments of program attributes such as time-out periods and others. The management of software in such systems is a difficult task. Software that may be adapted to meet varying performance and reliability requirements offers a solution. A REal-time Software Adaptation System (RESAS) includes a uniform model of adaptable software and provides the tool necessary for programmers to implement algorithms that choose and enact adaptations in real time. RESAS has been implemented on a testbed consisting of a multiprocessor and an attached workstation, and adaptation algorithms have been developed that address the problem of adapting software to achieve two goals: software execution within specified time constraints and software resiliency with respect to computer hardware failures.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; performance; reliability", subject = "{\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Real-time systems and embedded systems. {\bf C.3} Computer Systems Organization, SPECIAL-PURPOSE AND APPLICATION-BASED SYSTEMS, Real-time and embedded systems. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements. {\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management.", } @Article{Bershad:1991:ULI, author = "Brian N. Bershad and Thomas E. Anderson and Edward D. Lazowska and Henry M. Levy", title = "User-level Interprocess Communication for Shared Memory Multiprocessors", journal = j-TOCS, volume = "9", number = "2", pages = "175--198", month = may, year = "1991", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-2/p175-bershad/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; performance", subject = "{\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Multiprocessing/multiprogramming/multitasking. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors). {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Shared memory.", } @Article{Greenberg:1991:AUP, author = "Albert G. Greenberg and Boris D. Lubachevsky and Isi Mitrani", title = "Algorithms for Unboundedly Parallel Simulations", journal = j-TOCS, volume = "9", number = "3", pages = "201--221", month = aug, year = "1991", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-3/p201-greenberg/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; performance", subject = "{\bf I.6.8} Computing Methodologies, SIMULATION AND MODELING, Types of Simulation, Parallel. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors). {\bf F.1.2} Theory of Computation, COMPUTATION BY ABSTRACT DEVICES, Modes of Computation. {\bf I.6.8} Computing Methodologies, SIMULATION AND MODELING, Types of Simulation.", } @Article{Wang:1991:ETD, author = "Wen-Hann Wang and Jean-Loup Baer", title = "Efficient Trace-Driven Simulation Methods for Cache Performance Analysis", journal = j-TOCS, volume = "9", number = "3", pages = "222--241", month = aug, year = "1991", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-3/p222-wang/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; measurement; performance", subject = "{\bf B.3.3} Hardware, MEMORY STRUCTURES, Performance Analysis and Design Aids**, Simulation**. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles. {\bf B.3.3} Hardware, MEMORY STRUCTURES, Performance Analysis and Design Aids**.", } @Article{Garcia-Molina:1991:ORM, author = "H{\'e}ctor Garc{\'\i}a-Molina and Annemarie Spauster", title = "Ordered and Reliable Multicast Communication", journal = j-TOCS, volume = "9", number = "3", pages = "242--271", month = aug, year = "1991", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-3/p242-garcia-molina/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; reliability", subject = "{\bf C.2.1} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design, Network communications. {\bf C.2.1} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design. {\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems.", } @Article{Schiper:1991:LCA, author = "Andr{\'e} Schiper and Kenneth Birman and Pat Stephenson", title = "Lightweight Causal and Atomic Group Multicast", journal = j-TOCS, volume = "9", number = "3", pages = "272--314", month = aug, year = "1991", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-3/p272-schiper/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; reliability", subject = "{\bf C.2.1} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design, Network communications. {\bf C.2.1} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design. {\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design.", } @Article{Larowe:1991:ECM, author = "Richard P. {Larowe, Jr.} and Carla Schlatter Ellis", title = "Experimental Comparison of Memory Management Policies for {NUMA} Multiprocessors", journal = j-TOCS, volume = "9", number = "4", pages = "319--363", month = nov, year = "1991", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Oct 31 06:27:19 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-4/p319-larowe/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "experimentation; management; measurement; performance", subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Shared memory. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Multiple-instruction-stream, multiple-data-stream processors (MIMD). {\bf D.4.8} Software, OPERATING SYSTEMS, Performance.", } @Article{Karn:1991:IRT, author = "Phil Karn and Craig Partridge", title = "Improving Round-Trip Time Estimates in Reliable Transport Protocols", journal = j-TOCS, volume = "9", number = "4", pages = "364--373", month = nov, year = "1991", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-4/p364-karn/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; performance; reliability", subject = "{\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols, Protocol verification. {\bf C.2.1} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design, Packet-switching networks. {\bf C.2.1} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design, Store and forward networks. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Message sending. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Network communication.", } @Article{Kandlur:1991:RBA, author = "Dilip D. Kandlur and Kang G. Shin", title = "Reliable Broadcast Algorithms for {HARTS}", journal = j-TOCS, volume = "9", number = "4", pages = "374--398", month = nov, year = "1991", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-4/p374-kandlur/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; performance; reliability", subject = "{\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, HARTS. {\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols.", } @Article{Ahamad:1991:MV, author = "Mustaque Ahamad and Mostafa H. Ammar and Shun Yan Cheung", title = "Multidimensional Voting", journal = j-TOCS, volume = "9", number = "4", pages = "399--431", month = nov, year = "1991", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-4/p399-ahamad/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; reliability; theory", subject = "{\bf B.4.5} Hardware, INPUT/OUTPUT AND DATA COMMUNICATIONS, Reliability, Testing, and Fault-Tolerance**, Redundant design**. {\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems.", } @Article{Kistler:1992:DOC, author = "James J. Kistler and M. Satyanarayanan", title = "Disconnected Operation in the {Coda File System}", journal = j-TOCS, volume = "10", number = "1", pages = "3--25", month = feb, year = "1992", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-1/p3-kistler/", abstract = "{\em Disconnected operation\/} is a mode of operation that enables a client to continue accessing critical data during temporary failures of a shared data repository. An important, though not exclusive, application of disconnected operation is in supporting portable computers. In this paper, we show that disconnected operation is feasible, efficient and usable by describing its design and implementation in the Coda File System. The central idea behind our work is that {\em caching of data}, now widely used for performance, can also be exploited to improve {\em availability.\/}", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; experimentation; measurement; performance; reliability", subject = "{\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Distributed file systems. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements.", } @Article{Rosenblum:1992:DIL, author = "Mendel Rosenblum and John K. Ousterhout", title = "The Design and Implementation of a Log-Structured File System", journal = j-TOCS, volume = "10", number = "1", pages = "26--52", month = feb, year = "1992", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-1/p26-rosenblum/", abstract = "This paper presents a new technique for disk storage management called a {\em log-structured file system}. A log-structured file system writes all modifications to disk sequentially in a log-like structure, thereby speeding up both file writing and crash recovery. The log is the only structure on disk; it contains indexing information so that files can be read back from the log efficiently. In order to maintain large free areas on disk for fast writing, we divide the log into{\em segments\/}and use a {\em segment cleaner\/} to compress the live information from heavily fragmented segments. We present a series of simulations that demonstrate the efficiency of a simple cleaning policy based on cost and benefit. We have implemented a prototype log-structured file system called Sprite LFS; it outperforms current Unix file systems by an order of magnitude for small-file writes while matching or exceeding Unix performance for reads and large writes. Even when the overhead for cleaning is included, Sprite LFS can use 70\% of the disk bandwidth for writing, whereas Unix file systems typically can use only 5-10\%.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; measurement; performance", subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Secondary storage. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Allocation/deallocation strategies. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Checkpoint/restart. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Simulation. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Operational analysis. {\bf H.2.2} Information Systems, DATABASE MANAGEMENT, Physical Design, Recovery and restart. {\bf H.3.2} Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Storage, File organization.", } @Article{Anderson:1992:SAE, author = "Thomas E. Anderson and Brian N. Bershad and Edward D. Lazowska and Henry M. Levy", title = "Scheduler Activations: Effective Kernel Support for the User-Level Management of Parallelism", journal = j-TOCS, volume = "10", number = "1", pages = "53--79", month = feb, year = "1992", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-1/p53-anderson/", abstract = "{\em Threads\/} are the vehicle for concurrency in many approaches to parallel programming. Threads can be supported either by the operating system kernel or by user-level library code in the application address space, but neither approach has been fully satisfactory. This paper addresses this dilemma. First, we argue that the performance of kernel threads is {\em inherently\/} worse than that of user-level threads, rather than this being an artifact of existing implementations; managing parallelism at the user level is essential to high-performance parallel computing. Next, we argue that the problems encountered in integrating user-level threads with other system services is a consequence of the lack of kernel support for user-level threads provided by contemporary multiprocessor operating systems; kernel threads are the {\em wrong abstraction\/} on which to support user-level management of parallelism. Finally, we describe the design, implementation, and performance of a new kernel interface and user-level thread package that together provide the same functionality as kernel threads without compromising the performance and flexibility advantages of user-level management of parallelism.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; measurement; performance", subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Scheduling. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Input/output. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance.", } @Article{Mogul:1992:NLS, author = "Jeffrey C. Mogul", title = "Network Locality at the Scale of Processes", journal = j-TOCS, volume = "10", number = "2", pages = "81--109", month = may, year = "1992", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-2/p81-mogul/", abstract = "Packets on a LAN can be viewed as a series of references to and from the objects they address. The amount of locality in this reference stream may be critical to the efficiency of network implementations, if the locality can be exploited through caching or scheduling mechanisms. Most previous studies have treated network locality with an addressing granularity of networks or individual hosts. This paper describes some experiments tracing locality at a finer grain, looking at references to individual processes, and with fine-grained time resolution. Observations of typical LANs show high per-process locality; that is, packets to a host usually arrive for the process that most recently sent a packet, and often with little intervening delay.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; measurement; performance", subject = "{\bf C.2.1} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design, Packet-switching networks. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Measurement techniques. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Performance attributes. {\bf C.2.5} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Local and Wide-Area Networks. {\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols, TCP/IP.", } @Article{OMalley:1992:DNA, author = "Sean W. O'Malley and Larry L. Peterson", title = "A Dynamic Network Architecture", journal = j-TOCS, volume = "10", number = "2", pages = "110--143", month = may, year = "1992", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-2/p110-o_malley/", abstract = "Network software is a critical component of any distributed system. Because of its complexity, network software is commonly layered into a hierarchy of protocols, or more generally, into a {\em protocol graph}. Typical protocol graphs---including those standardized in the ISO and TCP/IP network architectures---share three important properties; the protocol graph is simple, the nodes of the graph (protocols) encapsulate complex functionality, and the topology of the graph is relatively static. This paper describes a new way to organize network software that differs from conventional architectures in all three of these properties. In our approach, the protocol graph is complex, individual protocols encapsulate a single function, and the topology of the graph is dynamic. The main contribution of this paper is to describe the ideas behind our new architecture, illustrate the advantages of using the architecture, and demonstrate that the architecture results in efficient network software.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; performance", subject = "{\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols, Protocol architecture. {\bf C.2.1} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design, Network communications.", } @Article{Ramanathan:1992:DTC, author = "Parameswaran Ramanathan and Kang G. Shin", title = "Delivery of Time-Critical Messages using a Multiple Copy Approach", journal = j-TOCS, volume = "10", number = "2", pages = "144--166", month = may, year = "1992", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-2/p144-ramanathan/", abstract = "Reliable and timely delivery of messages between processing nodes is essential in distributed real-time systems. Failure to deliver a message within its deadline usually forces the system to undertake a recovery action, which introduces some cost (or overhead) to the system. This recovery cost can be very high, especially when the recovery action fails due to lack of time or resources. Proposed in this paper is a scheme to minimize the expected cost incurred as a result of messages failing to meet their deadlines. The scheme is intended for distributed real-time systems, especially with a point-to-point interconnection topology. The goal of minimizing the expected cost is achieved by sending multiple copies of a message through disjoint routes and thus increasing the probability of successful message delivery within the deadline. However, as the number of copies increases, the message traffic on the network increases, thereby increasing the delivery time for each of the copies. There is therefore a tradeoff between the number of copies of each message and the expected cost incurred as a result of messages missing their deadlines. The number of copies of each message to be sent is determined by optimizing this tradeoff. Simulation results for a hexagonal mesh and a hypercube topology indicate that the expected cost can be lowered substantially by the proposed scheme.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; measurement; performance; reliability", subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Reliability, availability, and serviceability. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed applications. {\bf C.3} Computer Systems Organization, SPECIAL-PURPOSE AND APPLICATION-BASED SYSTEMS, Real-time and embedded systems.", } @Article{Hsu:1992:ESN, author = "William Tsun-Yuk Hsu and Pen-Chung Yew", title = "An Effective Synchronization Network for Hot-Spot Accesses", journal = j-TOCS, volume = "10", number = "3", pages = "167--189", month = aug, year = "1992", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-3/p167-hsu/", abstract = "In large multiprocessor systems, fast synchronization is crucial for high performance. However, synchronization traffic tends to create ``hot-spots'' in shared memory and cause network congestion. Multistage shuffle-exchange networks have been proposed and built to handle synchronization traffic. Software combining schemes have also been proposed to relieve network congestion caused by hot-spots. However, multistage combining networks could be very expensive and software combining could be very slow. In this paper, we propose a single-stage combining network to handle synchronization traffic, which is separated from the regular memory traffic. A single-stage combining network has several advantages: (1) it is attractive from an implementation perspective because only one stage is needed(instead of log {\em N\/} stages); (2) Only one network is needed to handle both forward and returning requests; (3) combined requests are distributed evenly through the network---the wait buffer size is reduced; and (4) fast-finishing algorithms [30] can be used to shorten the network delay. Because of all these advantages, we show that a single-stage combining network gives good performance at a lower cost than a multistage combining network.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; performance", subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Synchronization. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Interconnection architectures. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Distributed systems.", } @Article{Atkins:1992:ACC, author = "M. S. Atkins and M. Y. Coady", title = "Adaptable Concurrency Control for Atomic Data Types", journal = j-TOCS, volume = "10", number = "3", pages = "190--225", month = aug, year = "1992", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-3/p190-atkins/", abstract = "In many distributed systems concurrent access is required to a shared object, where abstract object servers may incorporate type-specific properties to define consistency requirements. Each operation and its outcome is treated as an event, and conflicts may occur between different event types. Hence concurrency control and synchronization are required at the granularity of conflicting event types. With such a fine granularity of locking, the occurrence of conflicts is likely to be lower than with whole-object locking, so optimistic techniques become more attractive. This work describes the design, implementation, and performance of servers for a shared atomic object, a semiqueue, where each server employs either pessimistic or optimistic locking techniques on each conflicting event type. We compare the performance of a purely optimistic server, a purely pessimistic server, and a hybrid server which treats certain event types optimistically and others pessimistically, to demonstrate the most appropriate environment for using pessimistic, optimistic, or hybrid control. We show that the advantages of low overhead on optimistic locking at low conflict levels is offset at higher conflict levels by the wasted work done by aborted transactions. To achieve optimum performance over the whole range of conflict levels, an adaptable server is required, whereby the treatment of conflicting event types can be changed dynamically between optimistic and pessimistic, according to various criteria depending on the expected frequency of conflict. We describe our implementations of adaptable servers which may allocate concurrency control strategy on the basis of state information, the history of conflicts encountered, or by using preset transaction priorities. We show that the adaptable servers perform almost as well as the best of the purely optimistic, pessimistic, or hybrid servers under the whole range of conflict levels, showing the versatility and efficiency of the dynamic servers. Finally we outline a general design methodology for implementing adaptable concurrency control in servers for atomic objects, illustrated using an atomic shared B-tree.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; performance", subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Concurrency. {\bf D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent Programming, Distributed programming. {\bf D.3.3} Software, PROGRAMMING LANGUAGES, Language Constructs and Features, Abstract data types. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Concurrency. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Deadlocks. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Mutual exclusion. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Synchronization. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Simulation. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing.", } @Article{Glasgow:1992:LRA, author = "Janice Glasgow and Glenn Macewen and Prakash Panangaden", title = "A Logic for Reasoning about Security", journal = j-TOCS, volume = "10", number = "3", pages = "226--264", month = aug, year = "1992", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-3/p226-glasgow/", abstract = "A formal framework called {\em Security Logic\/} ({\em SL\/}) is developed for specifying and reasoning about security policies and for verifying that system designs adhere to such policies. Included in this modal logic framework are definitions of {\em knowledge}, {\em permission}, and {\em obligation}. Permission is used to specify secrecy policies and obligation to specify integrity policies. The combination of policies is addressed and examples based on policies from the current literature are given.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "security; theory", subject = "{\bf F.4.1} Theory of Computation, MATHEMATICAL LOGIC AND FORMAL LANGUAGES, Mathematical Logic. {\bf H.2.0} Information Systems, DATABASE MANAGEMENT, General. {\bf K.6.5} Computing Milieux, MANAGEMENT OF COMPUTING AND INFORMATION SYSTEMS, Security and Protection. {\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection.", } @Article{Lampson:1992:ADS, author = "Butler Lampson and Mart{\'\i}n Abadi and Michael Burrows and Edward Wobber", title = "Authentication in Distributed Systems: Theory and Practice", journal = j-TOCS, volume = "10", number = "4", pages = "265--310", month = nov, year = "1992", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-4/p265-lampson/", abstract = "We describe a theory of authentication and a system that implements it. Our theory is based on the notion of principal and a ``speaks for'' relation between principals. A simple principal either has a name or is a communication channel; a compound principal can express an adopted role or delegated authority. The theory shows how to reason about a principal's authority by deducing the other principals that it can speak for; authenticating a channel is one important application. We use the theory to explain many existing and proposed security mechanisms. In particular, we describe the system we have built. It passes principals efficiently as arguments or results of remote procedure calls, and it handles public and shared key encryption, name lookup in a large name space, groups of principals, program loading, delegation, access control, and revocation.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "security; theory; verification", subject = "{\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection, Authentication. {\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection, Access controls. {\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection, Cryptographic controls. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems. {\bf E.3} Data, DATA ENCRYPTION. {\bf K.6.5} Computing Milieux, MANAGEMENT OF COMPUTING AND INFORMATION SYSTEMS, Security and Protection, Authentication.", } @Article{Anderson:1992:FSC, author = "David P. Anderson and Yoshitomo Osawa and Ramesh Govindan", title = "A File System for Continuous Media", journal = j-TOCS, volume = "10", number = "4", pages = "311--337", month = nov, year = "1992", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-4/p311-anderson/", abstract = "The Continuous Media File System, CMFS, supports real-time storage and retrieval of continuous media data (digital audio and video) on disk. CMFS clients read or write files in ``sessions,'' each with a guaranteed minimum data rate. Multiple sessions, perhaps with different rates, and non-real-time access can proceed concurrently. CMFS addresses several interrelated design issues; real-time semantics for sessions, disk layout, an acceptance test for new sessions, and disk scheduling policy. We use simulation to compare different design choices.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; performance", subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, File organization. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Access methods. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Real-time systems and embedded systems. {\bf H.5.1} Information Systems, INFORMATION INTERFACES AND PRESENTATION, Multimedia Information Systems.", } @Article{Kessler:1992:PPA, author = "R. E. Kessler and Mark D. Hill", title = "Page Placement Algorithms for Large Real-Indexed Caches", journal = j-TOCS, volume = "10", number = "4", pages = "338--359", month = nov, year = "1992", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-4/p338-kessler/", abstract = "When a computer system supports both paged virtual memory and large real-indexed caches, cache performance depends in part on the main memory page placement. To date, most operating systems place pages by selecting an arbitrary page frame from a pool of page frames that have been made available by the page replacement algorithm. We give a simple model that shows that this naive (arbitrary) page placement leads to up to 30\% unnecessary cache conflicts. We develop several page placement algorithms, called {\em careful-mapping algorithms}, that try to select a page frame (from the pool of available page frames) that is likely to reduce cache contention. Using trace-driven simulation, we find that careful mapping results in 10-20\% fewer (dynamic) cache misses than naive mapping (for a direct-mapped real-indexed multimegabyte cache). Thus, our results suggest that careful mapping by the operating system can get about half the cache miss reduction that a cache size (or associativity) doubling can.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; measurement; performance", subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Allocation/deallocation strategies. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Cache memories. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Virtual memory. {\bf B.3.3} Hardware, MEMORY STRUCTURES, Performance Analysis and Design Aids**, Simulation**. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management. {\bf E.2} Data, DATA STORAGE REPRESENTATIONS. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Main memory.", } @Article{Ladin:1992:PHA, author = "Rivka Ladin and Barbara Liskov and Liuba Shrira and Sanjay Ghemawat", title = "Providing High Availability Using Lazy Replication", journal = j-TOCS, volume = "10", number = "4", pages = "360--391", month = nov, year = "1992", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-4/p360-ladin/", abstract = "To provide high availability for services such as mail or bulletin boards, data must be replicated. One way to guarantee consistency of replicated data is to force service operations to occur in the same order at all sites, but this approach is expensive. For some applications a weaker causal operation order can preserve consistency while providing better performance. This paper describes a new way of implementing causal operations. Our technique also supports two other kinds of operations: operations that are totally ordered with respect to one another and operations that are totally ordered with respect to all other operations. The method performs well in terms of response time, operation-processing capacity, amount of stored state, and number and size of messages; it does better than replication methods based on reliable multicast techniques.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; performance; reliability", subject = "{\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Distributed systems. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed applications. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Reliability, availability, and serviceability. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf H.2.2} Information Systems, DATABASE MANAGEMENT, Physical Design, Recovery and restart. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Distributed databases.", } @Article{Eager:1993:CER, author = "Derek L. Eager and John Jahorjan", title = "Chores: Enhanced Run-Time Support for Shared-Memory Parallel Computing", journal = j-TOCS, volume = "11", number = "1", pages = "1--32", month = feb, year = "1993", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-1/p1-eager/", abstract = "Parallel computing is increasingly important in the solution of large-scale numerical problems. The difficulty of efficiently hand-coding parallelism, and the limitations of parallelizing compilers, have nonetheless restricted its use by scientific programmers. In this paper we propose a new paradigm, {\em chores}, for the run-time support of parallel computing on shared-memory multiprocessors. We consider specifically uniform memory access shared-memory environments, although the chore paradigm should also be appropriate for use within the clusters of a large-scale nonuniform memory access machine. We argue that chore systems attain both the high efficiency of compiler approaches for the common case of data parallelism, and the flexibility and performance of user-level thread approaches for functional parallelism. These benefits are achieved within a single, simple conceptual model that almost entirely relieves the programmer and compiler from concerns of granularity, scheduling, and enforcement of synchronization constraints. Measurements of a prototype implementation demonstrate that the chore model can be supported more efficiently than can traditional approaches to either data or functional parallelism alone.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; measurement; performance", subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process Management. {\bf D.4.9} Software, OPERATING SYSTEMS, Systems Programs and Utilities. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Distributed systems. {\bf C.3} Computer Systems Organization, SPECIAL-PURPOSE AND APPLICATION-BASED SYSTEMS. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS.", } @Article{Gheith:1993:CKS, author = "Ahmed Gheith and Karsten Schwan", title = "{CHAOS$^{\rm arc}$}: Kernel Support for Multiweight Objects, Invocations, and Atomicity in Real-Time Multiprocessor Applications", journal = j-TOCS, volume = "11", number = "1", pages = "33--72", month = feb, year = "1993", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-1/p33-gheith/", abstract = "CHAOSarc is an object-based multiprocessor operating system kernel that provides primitives with which programmers may easily construct objects of differing types and object invocations of differing semantics, targeting multiprocessor systems, and real-time applications. The CHAOSarc can {\em guarantee\/} desired performance and functionality levels of selected computations in real-time applications. Such guarantees can be made despite possible uncertainty in execution environments by allowing programs to {\em adapt\/} in performance and functionality to varying operating conditions. This paper reviews the primitives offered by CHAOSarc and demonstrates how the required elements of the CHAOSarc real-time kernel are constructed with those primitives.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; experimentation; measurement; performance", subject = "{\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Real-time systems and embedded systems. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management. {\bf J.7} Computer Applications, COMPUTERS IN OTHER SYSTEMS, Real time. {\bf D.3.3} Software, PROGRAMMING LANGUAGES, Language Constructs and Features, Concurrent programming structures. {\bf C.3} Computer Systems Organization, SPECIAL-PURPOSE AND APPLICATION-BASED SYSTEMS, Real-time and embedded systems. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements.", } @Article{Kaashoek:1993:FIP, author = "M. Frans Kaashoek and Robbert van Renesse and Hans van Staveren and Andrew S. Tanenbaum", title = "{FLIP}: An Internetwork Protocol for Supporting Distributed Systems", journal = j-TOCS, volume = "11", number = "1", pages = "73--106", month = feb, year = "1993", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-1/p73-kaashoek/", abstract = "Most modern network protocols give adequate support for traditional applications such as file transfer and remote login. Distributed applications, however, have different requirements (e.g., efficient at-most-once remote procedure call even in the face of processor failures). Instead of using ad hoc protocols to meet each of the new requirements, we have designed a new protocol, called the Fast Local Internet Protocol (FLIP), that provides a clean and simple integrated approach to these new requirements. FLIP is an unreliable message protocol that provides both point-to-point communication and multicast communication, and requires almost no network management. Furthermore, by using FLIP we have simplified higher-level protocols such as remote procedure call and group communication, and enhanced support for process migration and security. A prototype implementation of FLIP has been built as part of the new kernel for the Amoeba distributed operating system, and is in daily use. Measurements of its performance are presented.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; management; measurement; performance", subject = "{\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Distributed systems. {\bf D.4.0} Software, OPERATING SYSTEMS, General, Amoeba. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management.", } @Article{Gopalakrishnan:1993:DVR, author = "Ganesh Gopalakrishnan and Richard Fujimoto", title = "Design and Verification of the {Rollback Chip} using {HOP}: a Case Study of Formal Methods Applied to Hardware Design", journal = j-TOCS, volume = "11", number = "2", pages = "109--145", month = may, year = "1993", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-2/p109-gopalakrishnan/", abstract = "The use of formal methods in hardware design improves the quality of designs in many ways: it promotes better understanding of the design; it permits systematic design refinement through the discovery of invariants; and it allows design verification (informal or formal). In this paper we illustrate the use of formal methods in the design of a custom hardware system called the ``Rollback Chip'' (RBC), conducted using a simple hardware design description language called ``HOP''. An informal specification of the requirements of the RBC is first given, followed by a {\em behavioral description\/} of the RBC stating its {\em desired behavior}. The behavioral description is refined into progressively more efficient designs, terminating in a {\em structural description}. Key refinement steps are based on system invariants that are discovered during the design, and proved correct during design verification. The first step in design verification is to apply a program called PARCOMP to {\em derive\/} a behavioral description from the structural description of the RBC. The derived behavior is then compared against the desired behavior using equational verification techniques. This work demonstrates that formal methods can be fruitfully applied to a nontrivial hardware design. It also illustrates the particular advantages of our approach based on HOP and PARCOMP. Last, but not the least, it formally verifies the RBC mechanism itself.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; languages; theory; verification", subject = "{\bf B.7.2} Hardware, INTEGRATED CIRCUITS, Design Aids, Verification. {\bf B.6.3} Hardware, LOGIC DESIGN, Design Aids, Hardware description languages. {\bf B.7.1} Hardware, INTEGRATED CIRCUITS, Types and Design Styles. {\bf B.7.2} Hardware, INTEGRATED CIRCUITS, Design Aids, Simulation.", } @Article{McCann:1993:DPA, author = "Cathy McCann and Raj Vaswani and John Zahorjan", title = "A Dynamic Processor Allocation Policy for Multiprogrammed Shared-Memory Multiprocessors", journal = j-TOCS, volume = "11", number = "2", pages = "146--178", month = may, year = "1993", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-2/p146-mccann/", abstract = "We propose and evaluate empirically the performance of a dynamic processor-scheduling policy for multiprogrammed shared-memory multiprocessors. The policy is dynamic in that it reallocates processors from one parallel job to another based on the currently realized parallelism of those jobs. The policy is suitable for implementation in production systems in that: ---It interacts well with very efficient user-level thread packages, leaving to them many low-level thread operations that do not require kernel intervention. ---It deals with thread blocking due to user I/O and page faults. ---It ensures fairness in delivering resources to jobs. ---Its performance, measured in terms of average job response time, is superior to that of previously proposed schedulers, including those implemented in existing systems. It provides good performance to very short, sequential (e.g., interactive) requests. We have evaluated our scheduler and compared it to alternatives using a set of prototype implementations running on a Sequent Symmetry multiprocessor. Using a number of parallel applications with distinct qualitative behaviors, we have both evaluated the policies according to the major criterion of overall performance and examined a number of more general policy issues, including the advantage of ``space sharing'' over ``time sharing'' the processors of a multiprocessor, and the importance of cooperation between the kernel and the application in reallocating processors between jobs. We have also compared the policies according to other criteia important in real implementations, in particular, fairness and respone time to short, sequential requests. We conclude that a combination of performance and implementation considerations makes a compelling case for our dynamic scheduling policy.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; measurement; performance", subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Scheduling. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Multiprocessing/multiprogramming/multitasking. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors).", } @Article{Thekkath:1993:LLL, author = "Chandramohan A. Thekkath and Henry M. Levy", title = "Limits to Low-Latency Communication on High-Speed Networks", journal = j-TOCS, volume = "11", number = "2", pages = "179--203", month = may, year = "1993", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-2/p179-thekkath/", abstract = "The throughput of local area networks is rapidly increasing. For example, the bandwidth of new ATM networks and FDDI token rings is an order of magnitude greater than that of Ethernets. Other network technologies promise a bandwidth increase of yet another order of magnitude in several years. However, in distributed systems, lowered latency rather than increased throughput is often of primary concern. This paper examines the system-level effects of newer high-speed network technologies on low-latency, cross-machine communications. To evaluate a number of influences, both hardware and software, we designed and implemented a new remote procedure call system targeted at providing low latency. We then ported this system to several hardware platforms (DECstation and SPARCstation) with several different networks and controllers (ATM, FDDI, and Ethernet). Comparing these systems allows us to explore the performance impact of alternative designs in the communication system with respect to achieving low latency, e.g., the network, the network controller, the hose architecture and cache system, and the kernel and user-level runtime software. Our RPC system, which achieves substantially reduced call times (170 [mu]seconds on an ATM network using DECstation 5000/200 hosts), allows us to isolate those components of next-generation networks and controllers that still stand in the way of low-latency communication. We demonstrate that new-generation processor technology and software design can reduce small-packet RPC times to near network-imposed limits, making network and controller design more crucial than ever to achieving truly low-latency communication.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; measurement; performance", subject = "{\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Network communication. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Message sending. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Distributed systems. {\bf B.4.2} Hardware, INPUT/OUTPUT AND DATA COMMUNICATIONS, Input/Output Devices, Channels and controllers. {\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols, Protocol architecture. {\bf C.2.1} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed applications. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Network operating systems.", } @Article{Ammann:1993:DTG, author = "Paul Ammann and Sushil Jajodia", title = "Distributed Timestamp Generation in Planar Lattice Networks", journal = j-TOCS, volume = "11", number = "3", pages = "205--225", month = aug, year = "1993", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-3/p205-ammann/", abstract = "Timestamps are considered for distributed environments in which information flow is restricted to one direction through a planar lattice imposed on a network. For applications in such networks, existing timestamping algorithms require extension and modification. For example, in secure environments, typical timestamps provide a potential signaling channel between incomparable levels. In hierarchical databases, typical timestamps cause peripheral sites to unnecessarily affect the behavior at main sites. Algorithms are presented by which a network node may generate and compare timestamps using timestamp components maintained at dominated nodes in the network. The comparison relation is shown to be acyclic for timestamps produced by the generation algorithm. We discuss ways to safely relax the requirement that the network be a lattice. By example, we show how to modify a simple nonplanar lattice so that the generation algorithm can be applied. Uses of the timestamp generation algorithm in the motivating applications are outlined.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; security", subject = "{\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed applications. {\bf C.2.0} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, General, Security and protection (e.g., firewalls). {\bf G.2.m} Mathematics of Computing, DISCRETE MATHEMATICS, Miscellaneous. {\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection, Information flow controls. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Distributed databases.", } @Article{Anderson:1993:MCM, author = "David P. Anderson", title = "Metascheduling for Continuous Media", journal = j-TOCS, volume = "11", number = "3", pages = "226--252", month = aug, year = "1993", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-3/p226-anderson/", abstract = "Next-generation distributed systems will support {\em continuous media\/} (digital audio and video) in the same framework as other data. Many applications that use continuous media need guaranteed end-to-end performance (bounds on throughput and delay). To reliably support these requirements, system components such as CPU schedulers, networks, and file systems must offer performance guarantees. A {\em metascheduler\/} coordinates these components, negotiating end-to-end guarantees on behalf of clients. The {\em CM-resource model}, described in this paper, provides a basis for such a metascheduler. It defines a workload parameterization, an abstract interface to resources, and an algorithm for reserving multiple resources. The model uses an economic approach to dividing end-to-end delay, and it allows system components to ``work ahead,'' improving the performance of nonreal-time workload.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; economics; performance", subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Scheduling. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed applications. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Performance attributes. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Buffering. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Network communication. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Real-time systems and embedded systems. {\bf H.5.1} Information Systems, INFORMATION INTERFACES AND PRESENTATION, Multimedia Information Systems, Audio input/output. {\bf H.5.1} Information Systems, INFORMATION INTERFACES AND PRESENTATION, Multimedia Information Systems, Video (e.g., tape, disk, DVI).", } @Article{Lim:1993:WAS, author = "Beng-Hong Lim and Anant Agarwal", title = "Waiting Algorithms for Synchronization in Large-Scale Multiprocessors", journal = j-TOCS, volume = "11", number = "3", pages = "253--294", month = aug, year = "1993", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-3/p253-lim/", abstract = "Through analysis and experiments, this paper investigates two-phase waiting algorithms to minimize the cost of waiting for synchronization in large-scale multiprocessors. In a two-phase algorithm, a thread first waits by polling a synchronization variable. If the cost of polling reaches a limit {\em Lpoll\/} and further waiting is necessary, the thread is blocked, incurring an additional fixed cost, {\em B}. The choice of {\em Lpoll\/} is a critical determinant of the performance of two-phase algorithms. We focus on methods for statically determining {\em Lpoll\/} because the run-time overhead of dynamically determining {\em Lpoll\/} can be comparable to the cost of blocking in large-scale multiprocessor systems with lightweight threads. Our experiments show that {\em always-block\/} ({\em Lpoll\/} = 0) is a good waiting algorithm with performance that is usually close to the best of the algorithms compared. We show that even better performance can be achieved with a static choice of {\em Lpoll\/} based on knowledge of likely wait-time distributions. Motivated by the observation that different synchronization types exhibit different wait-time distributions, we prove that a static choice of {\em Lpoll\/} can yield close to optimal on-line performance against an adversary that is restricted to choosing wait times from a fixed family of probability distributions. This result allows us to make an optimal static choice of {\em Lpoll\/} based on synchronization type. For exponentially distributed wait times, we prove that setting {\em Lpoll\/} = 1n(e-1){\em B\/} results in a waiting cost that is no more than {\em e/(e-1)\/} times the cost of an optimal off-line algorithm. For uniformly distributed wait times, we prove that setting {\em L\/}poll=1/2(square root of 5 -1){\em B\/} results in a waiting cost that is no more than (square root of 5 + 1)/2 (the golden ratio) times the cost of an optimal off-line algorithm. Experimental measurements of several parallel applications on the Alewife multiprocessor simulator corroborate our theoretical findings.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; experimentation; performance; theory", subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Synchronization. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Mutual exclusion. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Parallel processors**. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Stochastic analysis.", } @Article{Hill:1993:CSM, author = "Mark D. Hill and James R. Larus and Steven K. Reinhardt and David A. Wood", title = "Cooperative Shared Memory: Software and Hardware for Scalable Multiprocessors", journal = j-TOCS, volume = "11", number = "4", pages = "300--318", month = nov, year = "1993", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-4/p300-hill/", abstract = "We believe the paucity of massively parallel, shared-memory machines follows from the lack of a shared-memory programming performance model that can inform programmers of the cost of operations (so they can avoid expensive ones) and can tell hardware designers which cases are common (so they can build simple hardware to optimize them). Cooperative shared memory, our approach to shared-memory design, addresses this problem. Our initial implementation of cooperative shared memory uses a simple programming model, called Check-In/Check-Out (CICO), in conjunction with even simpler hardware, called Dir1SW. In CICO, programs bracket uses of shared data with a check\_in directive terminating the expected use of the data. A cooperative prefetch directive helps hide communication latency. Dir1SW is a minimal directory protocol that adds little complexity to message-passing hardware, but efficiently supports programs written within the CICO model.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; experimentation; measurement; performance", subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Shared memory. {\bf B.3.3} Hardware, MEMORY STRUCTURES, Performance Analysis and Design Aids**, Simulation**. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors). {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Parallel processors**. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Design studies. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Modeling techniques. {\bf D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent Programming, Parallel programming.", } @Article{Anderson:1993:HSS, author = "Thomas E. Anderson and Susan S. Owicki and James B. Saxe and Charles P. Thacker", title = "High-speed Switch Scheduling for Local-Area Networks", journal = j-TOCS, volume = "11", number = "4", pages = "319--352", month = nov, year = "1993", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-4/p319-anderson/", abstract = "Current technology trends make it possible to build communication networks that can support high-performance distributed computing. This paper describes issues in the design of a prototype switch for an arbitrary topology point-to-point network with link speeds of up to 1 Gbit/s. The switch deals in fixed-length ATM-style cells, which it can process at a rate of 37 million cells per second. It provides high bandwidth and low latency for datagram traffic. In addition, it supports real-time traffic by providing bandwidth reservations with guaranteed latency bounds. The key to the switch's operation is a technique called {\em parallel iterative matching}, which can quickly identify a set of conflict-free cells for transmission in a time slot. Bandwidth reservations are accommodated in the switch by building a fixed schedule for transporting cells from reserved flows across the switch; parallel iterative matching can fill unused slots with datagram traffic. Finally, we note that parallel iterative matching may not allocate bandwidth fairly among flows of datagram traffic. We describe a technique called {\em statistical matching}, which can be used to ensure fairness at the switch and to support applications with rapidly changing needs for guaranteed bandwidth.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; experimentation; performance", subject = "{\bf C.2.1} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design, Network communications. {\bf C.2.5} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Local and Wide-Area Networks, Access schemes. {\bf G.2.2} Mathematics of Computing, DISCRETE MATHEMATICS, Graph Theory, Graph algorithms.", } @Article{Li:1993:ANL, author = "Wei Li and Keshav Pingali", title = "Access Normalization: Loop Restructuring for {NUMA} Computers", journal = j-TOCS, volume = "11", number = "4", pages = "353--375", month = nov, year = "1993", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-4/p353-li/", abstract = "In scalable parallel machines, processors can make local memory accesses much faster than they can make remote memory accesses. Additionally, when a number of remote accesses must be made, it is usually more efficient to use block transfers of data rather than to use many small messages. To run well on such machines, software must exploit these features. We believe it is too onerous for a programmer to do this by hand, so we have been exploring the use of restructuring compiler technology for this purpose. In this article, we start with a language like HPF-Fortran with user-specified data distribution and develop a systematic loop transformation strategy called {\em access normalization\/} that restructures loop nests to exploit locality and block transfers. We demonstrate the power of our techniques using routines from the BLAS (Basic Linear Algebra Subprograms) library. An important feature of our approach is that we model loop transformation using {\em invertible\/} matrices and integer lattice theory.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; experimentation; languages; performance", subject = "{\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Multiple-instruction-stream, multiple-data-stream processors (MIMD). {\bf D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent Programming, Parallel programming. {\bf D.3.4} Software, PROGRAMMING LANGUAGES, Processors, Compilers. {\bf D.3.4} Software, PROGRAMMING LANGUAGES, Processors, Optimization. {\bf D.3.4} Software, PROGRAMMING LANGUAGES, Processors, Code generation.", } @Article{Mahlke:1993:SSM, author = "Scott A. Mahlke and William Y. Chen and Roger A. Bringmann and Richard E. Hank and Wen-Mei W. Hwu and B. Ramakrishna Rau and Michael S. Schlansker", title = "Sentinel Scheduling: a Model for Compiler-Controlled Speculative Execution", journal = j-TOCS, volume = "11", number = "4", pages = "376--408", month = nov, year = "1993", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-4/p376-mahlke/", abstract = "Speculative execution is an important source of parallelism for VLIW and superscalar processors. A serious challenge with compiler-controlled speculative execution is to efficiently handle exceptions for speculative instructions. In this article, a set of architectural features and compile-time scheduling support collectively referred to as {\em sentinel scheduling\/} is introduced. Sentinel scheduling provides an effective framework for both compiler-controlled speculative execution and exception handling. All program exceptions are accurately detected and reported in a timely manner with sentinel scheduling. Recovery from exceptions is also ensured with the model. Experimental results show the effectiveness of sentinel scheduling for exploiting instruction-level parallelism and overhead associated with exception handling.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; experimentation; performance", subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Associative memories. {\bf C.0} Computer Systems Organization, GENERAL, Hardware/software interfaces. {\bf C.0} Computer Systems Organization, GENERAL, Instruction set design. {\bf C.0} Computer Systems Organization, GENERAL, System architectures. {\bf C.1.1} Computer Systems Organization, PROCESSOR ARCHITECTURES, Single Data Stream Architectures, Pipeline processors**. {\bf D.2.5} Software, SOFTWARE ENGINEERING, Testing and Debugging, Error handling and recovery. {\bf D.3.4} Software, PROGRAMMING LANGUAGES, Processors, Code generation. {\bf D.3.4} Software, PROGRAMMING LANGUAGES, Processors, Compilers. {\bf D.3.4} Software, PROGRAMMING LANGUAGES, Processors, Optimization.", } @Article{Wobber:1994:ATO, author = "Edward Wobber and Mart{\'\i}n Abadi and Michael Burrows and Butler Lampson", title = "Authentication in the {Taos} Operating System", journal = j-TOCS, volume = "12", number = "1", pages = "3--32", month = feb, year = "1994", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1994-12-1/p3-wobber/", abstract = "We describe a design for security in a distributed system and its implementation. In our design, applications gain access to security services through a narrow interface. This interface provides a notion of identity that includes simple principals, groups, roles, and delegations. A new operating system component manages principals, credentials, and secure channels. It checks credentials according to the formal rules of a logic of authentication. Our implementation is efficient enough to support a substantial user community.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; security; theory", subject = "{\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection, Authentication. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems. {\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection, Access controls.", } @Article{Satyanarayanan:1994:LRV, author = "M. Satyanarayanan and Henry H. Mashburn and Puneet Kumar and David C. Steere and James J. Kistler", title = "Lightweight Recoverable Virtual Memory", journal = j-TOCS, volume = "12", number = "1", pages = "33--57", month = feb, year = "1994", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1994-12-1/p33-satyanarayanan/", abstract = "{\em Recoverable virtual memory\/}refers to regions of a virtual address space on which transactional guarantees are offered. This article describes RVM, an efficient, portable, and easily used implementation of recoverable virtual memory for Unix environments. A unique characteristic of RVM is that it allows independent control over the transactional properties of atomicity, permanence, and serializability. This leads to considerable flexibility in the use of RVM, potentially enlarging the range of applications that can benefit from transactions. It also simplifies the layering of functionality such as nesting and distribution. The article shows that RVM performs well over its intended range of usage even though it does not benefit from specialized operating system support. It also demonstrates the importance of intra- and inter-transaction optimizations.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; experimentation; measurement; performance; reliability", subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Virtual memory. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements. {\bf H.2.2} Information Systems, DATABASE MANAGEMENT, Physical Design, Recovery and restart. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing.", } @Article{Heidemann:1994:FSD, author = "John S. Heidemann and Gerald J. Popek", title = "File-system Development with Stackable Layers", journal = j-TOCS, volume = "12", number = "1", pages = "58--89", month = feb, year = "1994", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1994-12-1/p58-heidemann/", abstract = "Filing services have experienced a number of innovations in recent years, but many of these promising ideas have failed to enter into broad use. One reason is that current filing environments present several barriers to new development. For example, file systems today typically stand alone instead of building on the work of others, and support of new filing services often requires changes that invalidate existing work. Stackable file-system design addresses these issues in several ways. Complex filing services are constructed from layer ``building blocks,'' each of which may be provided by independent parties. There are no syntactic constraints to layer order, and layers can occupy different address spaces, allowing very flexible layer configuration. Independent layer evolution and development are supported by an extensible interface bounding each layer. This paper discusses stackable layering in detail and presents design techniques it enables. We describe an implementation providing these facilities that exhibits very high performance. By lowering barriers to new filing design, stackable layering offers the potential of broad third-party file-system development not feasible today.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; performance", subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Maintenance**. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Hierarchical design**. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements.", } @Article{Attiya:1994:SCV, author = "Hagit Attiya and Jennifer L. Welch", title = "Sequential Consistency versus Linearizability", journal = j-TOCS, volume = "12", number = "2", pages = "91--122", month = may, year = "1994", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1994-12-2/p91-attiya/", abstract = "The power of two well-known consistency conditions for shared-memory multiprocessors, {\em sequential consistency\/} and {\em linearizability}, is compared. The cost measure studied is the worst-case response time in distributed implementations of virtual shared memory supporting one of the two conditions. Three types of shared-memory objects are considered: read/write objects, FIFO queues, and stacks. If clocks are only approximately synchronized (or do not exist), then for all three object types it is shown that linearizability is more expensive than sequential consistency. We show that, for all three data types, the worst-case response time is very sensitive to the assumptions that are made about the timing information available to the system. Under the strong assumption that processes have perfectly synchronized clocks, it is shown that sequential consistency and linearizability are equally costly. We present upper bounds for linearizability and matching lower bounds for sequential consistency. The upper bounds are shown by presenting algorithms that use atomic broadcast in a modular fashion. The lower-bound proofs for the approximate case use the technique of ``shifting,'' first introduced for studying the clock synchronization problem.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; performance", subject = "{\bf D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent Programming, Distributed programming. {\bf D.3.3} Software, PROGRAMMING LANGUAGES, Language Constructs and Features, Concurrent programming structures. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Distributed memories. {\bf F.1.2} Theory of Computation, COMPUTATION BY ABSTRACT DEVICES, Modes of Computation, Parallelism and concurrency. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Distributed databases. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Concurrency.", } @Article{Mann:1994:CDF, author = "Timothy Mann and Andrew Birrell and Andy Hisgen and Charles Jerian and Garret Swart", title = "A Coherent Distributed File Cache with Directory Write-Behind", journal = j-TOCS, volume = "12", number = "2", pages = "123--164", month = may, year = "1994", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1994-12-2/p123-mann/", abstract = "Extensive caching is a key feature of the Echo distributed file system. Echo client machines maintain coherent caches of file and directory data and properties, with write-behind (delayed write-back) of {\em all\/} cached information. Echo specifies ordering constraints on this write-behind, enabling applications to store and maintain consistent data structures in the file system even when crashes or network faults prevent some writes from being completed. In this paper we describe the Echo cache's coherence and ordering semantics, show how they can improve the performance and consistency of applications, explain how they are implemented. We also discuss the general problem of reliably notifying applications and users when write-behind is lost; we addressed this problem as part of the Echo design, but did not find a fully satisfactory solution.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; experimentation; measurement; performance; reliability; security", subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Distributed file systems.", } @Article{Uhlig:1994:DTS, author = "Richard Uhlig and David Nagle and Tim Stanley and Trevor Mudge and Stuart Sechrest and Richard Brown", title = "Design Tradeoffs for Software-Managed {TLBs}", journal = j-TOCS, volume = "12", number = "3", pages = "175--205", month = aug, year = "1994", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1994-12-3/p175-uhlig/", abstract = "An increasing number of architectures provide virtual memory support through software-managed TLBs. However, software management can impose considerable penalties that are highly dependent on the operating system's structure and its use of virtual memory. This work explores software-managed TLB design tradeoffs and their interaction with a range of monolithic and microkernel operating systems. Through hardware monitoring and simulation, we explore TLB performance for benchmarks running on a MIPS R2000-based workstation running Ultrix, OSF/1, and three versions of Mach 3.0.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; experimentation; performance", subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Virtual memory. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Measurement techniques. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Associative memories. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Cache memories. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Virtual memory. {\bf B.3.3} Hardware, MEMORY STRUCTURES, Performance Analysis and Design Aids**, Simulation**. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements.", } @Article{Stodolsky:1994:PLD, author = "Daniel Stodolsky and Mark Holland and William V. {Courtright II} and Garth A. Gibson", title = "Parity Logging Disk Arrays", journal = j-TOCS, volume = "12", number = "3", pages = "206--235", month = aug, year = "1994", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1994-12-3/p206-stodolsky/", abstract = "Parity-encoded redundant disk arrays provide highly reliable, cost-effective secondary storage with high performance for reads and large writes. Their performance on small writes, however, is much worse than mirrored disks---the traditional, highly reliable, but expensive organization for secondary storage. Unfortunately, small writes are a substantial portion of the I/O workload of many important, demanding applications such as on-line transaction processing. This paper presents {\em parity logging}, a novel solution to the small-write problem for redundant disk arrays. Parity logging applies journalling techniques to reduce substantially the cost of small writes. We provide detailed models of parity logging and competing schemes---mirroring, floating storage, and RAID level 5---and verify these models by simulation. Parity logging provides performance competitive with mirroring, but with capacity overhead close to the minimum offered by RAID level 5. Finally, parity logging can exploit data caching more effectively than all three alternative approaches.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; performance; reliability", subject = "{\bf B.4.2} Hardware, INPUT/OUTPUT AND DATA COMMUNICATIONS, Input/Output Devices, Channels and controllers. {\bf B.4.5} Hardware, INPUT/OUTPUT AND DATA COMMUNICATIONS, Reliability, Testing, and Fault-Tolerance**, Redundant design**.", } @Article{Cao:1994:TPR, author = "Pei Cao and Swee Boon Lin and Shivakumar Venkataraman and John Wilkes", title = "The {TickerTAIP} Parallel {RAID} Architecture", journal = j-TOCS, volume = "12", number = "3", pages = "236--269", month = aug, year = "1994", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1994-12-3/p236-cao/", abstract = "Traditional disk arrays have a centralized architecture, with a single controller through which all requests flow. Such a controller is a single point of failure, and its performance limits the maximum number of disks to which the array can scale. We describe TickerTAIP, a parallel architecture for disk arrays that distributes the controller functions across several loosely coupled processors. The result is better scalability, fault tolerance, and flexibility. This article presents the TickerTAIP architecture and an evaluation of its behavior. We demonstrate the feasibility by a working example, describe a family of distributed algorithms for calculating RAID parity, discuss techniques for establishing request atomicity, sequencing, and recovery, and evaluate the performance of the TickerTAIP design in both absolute terms and by comparison to a centralized RAID implementation. We also analyze the effects of including disk-level request-scheduling algorithms inside the array. We conclude that the Ticker TAIP architectural approach is feasible, useful, and effective.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; performance; reliability", subject = "{\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Distributed systems. {\bf B.4.2} Hardware, INPUT/OUTPUT AND DATA COMMUNICATIONS, Input/Output Devices, Channels and controllers. {\bf D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent Programming, Parallel programming. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Secondary storage. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Distributed systems.", } @Article{Chase:1994:SPS, author = "Jeffrey S. Chase and Henry M. Levy and Michael J. Feeley and Edward D. Lazowska", title = "Sharing and Protection in a Single-Address-Space Operating System", journal = j-TOCS, volume = "12", number = "4", pages = "271--307", month = nov, year = "1994", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1994-12-4/p271-chase/", abstract = "This article explores memory sharing and protection support in Opal, a single-address-space operating system designed for wide-address (64-bit) architectures. Opal threads execute within protection domains in a single shared virtual address space. Sharing is simplified, because addresses are context independent. There is no loss of protection, because addressability and access are independent; the right to access a segment is determined by the protection domain in which a thread executes. This model enables beneficial code-and data-sharing patterns that are currently prohibitive, due in part to the inherent restrictions of multiple address spaces, and in part to Unix programming style. We have designed and implemented an Opal prototype using the Mach 3.0 microkernel as a base. Our implementation demonstrates how a single-address-space structure can be supported alongside of other environments on a modern microkernel operating system, using modern wide-address architectures. This article justifies the Opal model and its goals for sharing and protection, presents the system and its abstractions, describes the prototype implementation, and reports experience with integrated applications.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; experimentation; measurement; performance", subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management. {\bf C.1.3} Computer Systems Organization, PROCESSOR ARCHITECTURES, Other Architecture Styles, Capability architectures**. {\bf D.3.3} Software, PROGRAMMING LANGUAGES, Language Constructs and Features, Modules, packages. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management. {\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection, Access controls. {\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection, Information flow controls. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements. {\bf E.1} Data, DATA STRUCTURES. {\bf E.2} Data, DATA STORAGE REPRESENTATIONS.", } @Article{Chen:1994:NAP, author = "Peter M. Chen and David A. Patterson", title = "A New Approach to {I/O} Performance Evaluation: Self-Scaling {I/O} Benchmarks, Predicted {I/O} Performance", journal = j-TOCS, volume = "12", number = "4", pages = "308--339", month = nov, year = "1994", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1994-12-4/p308-chen/", abstract = "Current I/O benchmarks suffer from several chronic problems: they quickly become obsolete; they do not stress the I/O system; and they do not help much in understanding I/O system performance. We propose a new approach to I/O performance analysis. First, we propose a self-scaling benchmark that dynamically adjusts aspects of its workload according to the performance characteristic of the system being measured. By doing so, the benchmark automatically scales across current and future systems. The evaluation aids in understanding system performance by reporting how performance varies according to each of five workload parameters. Second, we propose predicted performance, a technique for using the results from the self-scaling evaluation to estimate quickly the performance for workloads that have not been measured. We show that this technique yields reasonably accurate performance estimates and argue that this method gives a far more accurate comparative performance evaluation than traditional single-point benchmarks. We apply our new evaluation technique by measuring a SPARCstation 1+ with one SCSI disk, an HP 730 with one SCSI-II disk, a DECstation 5000/200 running the Sprite LFS operating system with a three-disk disk array, a Convex C240 minisupercomputer with a four-disk disk array, and a Solbourne 5E/905 fileserver with a two-disk disk array.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "performance", subject = "{\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements. {\bf K.6.2} Computing Milieux, MANAGEMENT OF COMPUTING AND INFORMATION SYSTEMS, Installation Management, Benchmarks. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS. {\bf D.2.8} Software, SOFTWARE ENGINEERING, Metrics, Performance measures. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Input/output.", } @Article{Reiter:1994:SAF, author = "Michael K. Reiter and Kenneth P. Birman and Robbert van Renesse", title = "A Security Architecture for Fault-Tolerant Systems", journal = j-TOCS, volume = "12", number = "4", pages = "340--371", month = nov, year = "1994", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1994-12-4/p340-reiter/", abstract = "Process groups are a common abstraction for fault-tolerant computing in distributed systems. We present a security architecture that extends the process group into a security abstraction. Integral parts of this architecture are services that securely and fault tolerantly support cryptographic key distribution. Using replication only when necessary, and introducing novel replication techniques when it was necessary, we have constructed these services both to be easily defensible against attack and to permit key distribution despite the transient unavailability of a substantial number of servers. We detail the design and implementation of these services and the secure process group abstraction they support. We also give preliminary performance figures for some common group operations.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "reliability; security", subject = "{\bf C.2.0} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, General, Security and protection (e.g., firewalls). {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection, Authentication. {\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection, Cryptographic controls. {\bf K.6.5} Computing Milieux, MANAGEMENT OF COMPUTING AND INFORMATION SYSTEMS, Security and Protection, Authentication. {\bf E.3} Data, DATA ENCRYPTION.", } @Article{Bates:1995:DHD, author = "Peter C. Bates", title = "Debugging Heterogeneous Distributed Systems Using Event-Based Models of Behavior", journal = j-TOCS, volume = "13", number = "1", pages = "1--31", month = feb, year = "1995", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1995-13-1/p1-bates/", abstract = "We describe a high-level debugging approach, Event-Based Behavioral Abstraction (EBBA), in which debugging is treated as a process of creating models of expected program behaviors and comparing these to the actual behaviors exhibited by the program. The use of EBBA techniques can enhance debugging-tool transparency, reduce latency and uncertainty for fundamental debugging activities, and accommodate diverse, heterogeneous architectures. Using events and behavior models as a basic mechanism provides a uniform view of heterogeneous systems and enables analysis to be performed in well-defined ways. Their use also enables EBBA users to extend and reuse knowledge gained in solving previous problems to new situations. We describe our behavior-modeling algorithm that matches actual behavior to models and automates many behavior analysis steps. The algorithm matches behavior in as many ways as possible and resolves these to return the best match to the user. It deals readily with partial behavior matches and incomplete information. In particular, we describe a tool set we have built. The tool set has been used to investigate the behavior of a wide range of programs. The tools are modular and can be distributed readily throughout a system.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; reliability", subject = "{\bf D.2.5} Software, SOFTWARE ENGINEERING, Testing and Debugging, Debugging aids. {\bf C.2.3} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Operations, Network monitoring. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed applications. {\bf D.2.2} Software, SOFTWARE ENGINEERING, Design Tools and Techniques, Programmer workbench**. {\bf D.2.5} Software, SOFTWARE ENGINEERING, Testing and Debugging, Monitors. {\bf D.2.5} Software, SOFTWARE ENGINEERING, Testing and Debugging, Tracing.", } @Article{Sugumar:1995:SAC, author = "Rabin A. Sugumar and Santosh G. Abraham", title = "Set-Associative Cache Simulation Using Generalized Binomial Trees", journal = j-TOCS, volume = "13", number = "1", pages = "32--56", month = feb, year = "1995", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1995-13-1/p32-sugumar/", abstract = "Set-associative caches are widely used in CPU memory hierarchies, I/O subsystems, and file systems to reduce average access times. This article proposes an efficient simulation technique for simulating a group of set-associative caches in a single pass through the address trace, where all caches have the same line size but varying associativities and varying number of sets. The article also introduces a generalization of the ordinary binomial tree and presents a representation of caches in this class using the Generalized Binomial Tree (gbt). The tree representation permits efficient search and update of the caches. Theoretically, the new algorithm, GBF\_LS, based on the gbt structure, always takes fewer comparisons than the two earlier algorithms for the same class of caches: all-associativity and generalized forest simulation. Experimentally, the new algorithm shows performance gains in the range of 1.2 to 3.8 over the earlier algorithms on address traces of the SPEC benchmarks. A related algorithm for simulating multiple alternative direct-mapped caches with fixed cache size, but varying line size, is also presented.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; measurement; performance", subject = "{\bf B.3.3} Hardware, MEMORY STRUCTURES, Performance Analysis and Design Aids**, Simulation**. {\bf E.1} Data, DATA STRUCTURES, Trees. {\bf I.6.8} Computing Methodologies, SIMULATION AND MODELING, Types of Simulation. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Cache memories.", } @Article{Tullsen:1995:ECP, author = "Dean M. Tullsen and Susan J. Eggers", title = "Effective Cache Prefetching on Bus-Based Multiprocessors", journal = j-TOCS, volume = "13", number = "1", pages = "57--88", month = feb, year = "1995", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1995-13-1/p57-tullsen/", abstract = "Compiler-directed cache prefetching has the potential to hide much of the high memory latency seen by current and future high-performance processors. However, prefetching is not without costs, particularly on a shared-memory multiprocessor. Prefetching can negatively affect bus utilization, overall cache miss rates, memory latencies and data sharing. We simulate the effects of a compiler-directed prefetching algorithm, running on a range of bus-based multiprocessors. We show that, despite a high memory latency, this architecture does not necessarily support prefetching well, in some cases actually causing performance degradations. We pinpoint several problems with prefetching on a shared-memory architecture (additional conflict misses, no reduction in the data-sharing traffic and associated latencies, a multiprocessor's greater sensitivity to memory utilization and the sensitivity of the cache hit rate to prefetch distance) and measure their effect on performance. We then solve those problems through architectural techniques and heuristics for prefetching that could be easily incorporated into a compiler: (1) victim caching, which eliminates most of the cache conflict misses caused by prefetching in a direct-mapped cache, (2) special prefetch algorithms for shared data, which significantly improve the ability of our basic prefetching algorithm to prefetch individual misses, and (3) compiler-based shared-data restructuring, which eliminates many of the invalidation misses the basic prefetching algorithm does not predict. The combined effect of these improvements is to make prefetching effective over a much wider range of memory architectures.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; experimentation; performance", subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Cache memories. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Shared memory. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors).", } @Article{Akyurek:1995:ABR, author = "Sedat Aky{\"u}rek and Kenneth Salem", title = "Adaptive Block Rearrangement", journal = j-TOCS, volume = "13", number = "2", pages = "89--121", month = may, year = "1995", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1995-13-2/p89-akyurek/", abstract = "An adaptive technique for reducing disk seek times is described. The technique copies frequently referenced blocks from their original locations to reserved space near the middle of the disk. Reference frequencies need not be known in advance. Instead, they are estimated by monitoring the stream of arriving requests. Trace-driven simulations show that seek times can be cut substantially by copying only a small number of blocks using this technique. The technique has been implemented by modifying a UNIX device driver. No modifications are required to the file system that uses the driver.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; experimentation; performance", subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements. {\bf H.3.2} Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Storage. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Modeling and prediction. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Simulation. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance.", } @Article{Hosseini-Khayat:1995:SEB, author = "Saied Hosseini-Khayat and Andreas D. Bovopoulos", title = "A Simple and Efficient Bus Management Scheme That Supports Continuous Streams", journal = j-TOCS, volume = "13", number = "2", pages = "122--140", month = may, year = "1995", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1995-13-2/p122-hosseini-khayat/", abstract = "An efficient bandwidth management and access arbitration scheme for an I/O bus in a multimedia workstation is presented. It assumes that a multimedia workstation consists of a number of processing modules which are interconnected by a packet bus. The scheme is efficient in the sense that it allows the bus to support both continuous media transfers and regular random transactions in such a way that continuous streams can meet their real-time constraints independently of random traffic, and random traffic is not delayed significantly by continuous traffic except when traffic load is very high. Implementation guidelines are provided to show that the scheme is practical. Finally, the performance of this scheme is compared with alternative solutions through simulation.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "experimentation; performance", subject = "{\bf B.4.3} Hardware, INPUT/OUTPUT AND DATA COMMUNICATIONS, Interconnections (Subsystems), Topology. {\bf B.4.4} Hardware, INPUT/OUTPUT AND DATA COMMUNICATIONS, Performance Analysis and Design Aids**, Simulation**. {\bf H.5.1} Information Systems, INFORMATION INTERFACES AND PRESENTATION, Multimedia Information Systems. {\bf C.0} Computer Systems Organization, GENERAL, System architectures.", } @Article{Singh:1995:IHB, author = "Jaswinder Pal Singh and John L. Hennessy and Anoop Gupta", title = "Implications of Hierarchical {$N$}-Body Methods for Multiprocessor Architectures", journal = j-TOCS, volume = "13", number = "2", pages = "141--202", month = may, year = "1995", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1995-13-2/p141-singh/", abstract = "To design effective large-scale multiprocessors, designers need to understand the characteristics of the applications that will use the machines. Application characteristics of particular interest include the amount of communication relative to computation, the structure of the communication, and the local cache and memory requirements, as well as how these characteristics scale with larger problems and machines. One important class of applications is based on hierarchical N-body methods, which are used to solve a wide range of scientific and engineering problems efficiently. Important characteristics of these methods include the nonuniform and dynamically changing nature of the domains to which they are applied, and their use of long-range, irregular communication. This article examines the key architectural implications of representative applications that use the two dominant hierarchical N-body methods: the Barnes--Hut Method and the Fast Multipole Method. We first show that exploiting temporal locality on accesses to communicated data is critical to obtaining good performance on these applications and then argue that coherent caches on shared-address-space machines exploit this locality both automatically and very effectively. Next, we examine the implications of scaling the applications to run on larger machines. We use scaling methods that reflect the concerns of the application scientist and find that this leads to different conclusions about how communication traffic and local cache and memory usage scale than scaling based only on data set size. In particular, we show that under the most realistic form of scaling, both the communication-to-computation ratio as well as the working-set size (and hence the ideal cache size per processor) grow slowly as larger problems are run on larger machines. Finally, we examine the effects of using the two dominant abstractions for interprocessor communication: a shared address space and explicit message passing between private address spaces. We show that the lack of an efficiently supported shared address space will substantially increase the programming complexity and performance overheads for these applications.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; experimentation; measurement; performance", subject = "{\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors). {\bf C.0} Computer Systems Organization, GENERAL, System architectures. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS. {\bf C.5.1} Computer Systems Organization, COMPUTER SYSTEM IMPLEMENTATION, Large and Medium (``Mainframe'') Computers.", } @Article{Carter:1995:TRC, author = "John B. Carter and John K. Bennett and Willy Zwaenepoel", title = "Techniques for Reducing Consistency-Related Communication in Distributed Shared-Memory Systems", journal = j-TOCS, volume = "13", number = "3", pages = "205--243", month = aug, year = "1995", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1995-13-3/p205-carter/", abstract = "Distributed shared memory (DSM) is an abstraction of shared memory on a distributed-memory machine. Hardware DSM systems support this abstraction at the architecture level; software DSM systems support the abstraction within the runtime system. One of the key problems in building an efficient software DSM system is to reduce the amount of communication needed to keep the distributed memories consistent. In this article we present four techniques for doing so: software release consistency; multiple consistency protocols; write-shared protocols; and an update-with-timeout mechanism. These techniques have been implemented in the Munin DSM system. We compare the performance of seven Munin application programs: first to their performance when implemented using message passing, and then to their performance when running on a conventional software DSM system that does not embody the preceding techniques. On a 16-processor cluster of workstations, Munin's performance is within 5\% of message passing for four out of the seven applications. For the other three, performance is within 29 to 33\%. Detailed analysis of two of these three applications indicates that the addition of a function-shipping capability would bring their performance to within 7\% of the message-passing performance. Compared to a conventional DSM system, Munin achieves performance improvements ranging from a few to several hundred percent, depending on the application.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; performance", subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Distributed memories. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Cache memories. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Interconnection architectures. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Network communication. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Distributed systems. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Shared memory. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Virtual memory. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Virtual memory.", } @Article{Diwan:1995:MSP, author = "Amer Diwan and David Tarditi and Eliot Moss", title = "Memory System Performance of Programs with Intensive Heap Allocation", journal = j-TOCS, volume = "13", number = "3", pages = "244--273", month = aug, year = "1995", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1995-13-3/p244-diwan/", abstract = "Heap allocation with copying garbage collection is a general storage management technique for programming languages. It is believed to have poor memory system performance. To investigate this, we conducted an in-depth study of the memory system performance of heap allocation for memory systems found on many machines. We studied the performance of mostly functional Standard ML programs which made heavy use of heap allocation. We found that most machines support heap allocation poorly. However, with the appropriate memory system organization, heap allocation can have good performance. The memory system property crucial for achieving good performance was the ability to allocate and initialize a new object into the cache without a penalty. This can be achieved by having subblock by placement with a subblock size of one word with a write-allocate policy, along with fast page-mode writes or a write buffer. For caches with subblock placement, the data cache overhead was under 9\% for a 64K or larger data cache; without subblock placement the overhead was often higher than 50\%.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "experimentation; languages; measurement; performance", subject = "{\bf D.3.3} Software, PROGRAMMING LANGUAGES, Language Constructs and Features, Dynamic storage management. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Associative memories. {\bf B.3.3} Hardware, MEMORY STRUCTURES, Performance Analysis and Design Aids**, Simulation**. {\bf D.1.1} Software, PROGRAMMING TECHNIQUES, Applicative (Functional) Programming. {\bf D.3.2} Software, PROGRAMMING LANGUAGES, Language Classifications. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Cache memories. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS.", } @Article{Hartman:1995:ZSN, author = "John H. Hartman and John K. Ousterhout", title = "The {Zebra} Striped Network File System", journal = j-TOCS, volume = "13", number = "3", pages = "274--310", month = aug, year = "1995", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1995-13-3/p274-hartman/", abstract = "Zebra is a network file system that increases throughput by striping the file data across multiple servers. Rather than striping each file separately, Zebra forms all the new data from each client into a single stream, which it then stripes using an approach similar to a log-structured file system. This provides high performance for writes of small files as well as for reads and writes of large files. Zebra also writes parity information in each stripe in the style of RAID disk arrays; this increases storage costs slightly, but allows the system to continue operation while a single storage server is unavailable. A prototype implementation of Zebra, built in the Sprite operating system, provides 4-5 times the throughput of the standard Sprite file system or NFS for large files and a 15-300\% improvement for writing small files.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; measurement; performance; reliability", subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, File organization. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Allocation/deallocation strategies. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Access methods. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Distributed systems. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements. {\bf E.5} Data, FILES, Organization/structure. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Secondary storage. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Distributed file systems.", } @Article{Amir:1995:TSR, author = "Y. Amir and L. E. Moser and P. M. Melliar-Smith and D. A. Agarwal and P. Ciarfella", title = "The {Totem} Single-Ring Ordering and Membership Protocol", journal = j-TOCS, volume = "13", number = "4", pages = "311--342", month = nov, year = "1995", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1995-13-4/p311-amir/", abstract = "Fault-tolerant distributed systems are becoming more important, but in existing systems, maintaining the consistency of replicated data is quite expensive. The Totem single-ring protocol supports consistent concurrent operations by placing a total order on broadcast messages. This total order is derived from the sequence number in a token that circulates around a logical ring imposed on a set of processors in a broadcast domain. The protocol handles reconfiguration of the system when processors fail and restart or when the network partitions and remerges. Extended virtual synchrony ensures that processors deliver messages and configuration changes to the application in a consistent, systemwide total order. An effective flow control mechanism enables the Totem single-ring protocol to achieve message-ordering rates significantly higher than the best prior total-ordering protocols.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "performance; reliability", subject = "{\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols, Protocol architecture. {\bf C.2.1} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design, Network communications. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Network operating systems. {\bf C.2.5} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Local and Wide-Area Networks, Token rings. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Network communication. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Distributed systems.", } @Article{Herlihy:1995:SCC, author = "Maurice Herlihy and Beng-Hong Lim and Nir Shavit", title = "Scalable Concurrent Counting", journal = j-TOCS, volume = "13", number = "4", pages = "343--364", month = nov, year = "1995", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1995-13-4/p343-herlihy/", abstract = "The notion of counting is central to a number of basic multiprocessor coordination problems, such as dynamic load balancing, barrier synchronization, and concurrent data structure design. We investigate the scalability of a variety of counting techniques for large-scale multiprocessors. We compare counting techniques based on: (1) spin locks, (2) message passing, (3) distributed queues, (4) software combining trees, and (5) counting networks. Our comparison is based on a series of simple benchmarks on a simulated 64-processor Alewife machine, a distributed-memory multiprocessor currently under development at MIT. Although locking techniques are known to perform well on small-scale, bus-based multiprocessors, serialization limits performance, and contention can degrade performance. Both counting networks and combining trees outperform the other methods substantially by avoiding serialization and alleviating contention, although combining-tree throughput is more sensitive to variations in load. A comparison of shared-memory and message-passing implementations of counting networks and combining trees shows that message-passing implementations have substantially higher throughput.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; experimentation; performance", subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Multiprocessing/multiprogramming/multitasking. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Multiple-instruction-stream, multiple-data-stream processors (MIMD). {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Concurrency. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Scheduling. {\bf B.3.3} Hardware, MEMORY STRUCTURES, Performance Analysis and Design Aids**, Simulation**. {\bf E.1} Data, DATA STRUCTURES, Lists, stacks, and queues. {\bf E.1} Data, DATA STRUCTURES, Trees.", } @Article{Mandrioli:1995:GTC, author = "Dino Mandrioli and Sandro Morasca and Angelo Morzenti", title = "Generating Test Cases for Real-Time Systems from Logic Specifications", journal = j-TOCS, volume = "13", number = "4", pages = "365--398", month = nov, year = "1995", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1995-13-4/p365-mandrioli/", abstract = "We address the problem of automated derivation of functional test cases for real-time systems, by introducing techniques for generating test cases from formal specifications written in TRIO, a language that extends classical temporal logic to deal explicitly with time measures. We describe an interactive tool that has been built to implement these techniques, based on interpretation algorithms of the TRIO language. Several heuristic criteria are suggested to reduce drastically the size of the test cases that are generated. Experience in the use of the tool on real-life cases is reported.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; verification", subject = "{\bf D.2.5} Software, SOFTWARE ENGINEERING, Testing and Debugging, Testing tools (e.g., data generators, coverage testing). {\bf B.6.2} Hardware, LOGIC DESIGN, Reliability and Testing**, Test generation**. {\bf B.6.3} Hardware, LOGIC DESIGN, Design Aids, Verification. {\bf C.3} Computer Systems Organization, SPECIAL-PURPOSE AND APPLICATION-BASED SYSTEMS, Real-time and embedded systems. {\bf D.2.1} Software, SOFTWARE ENGINEERING, Requirements/Specifications, Languages. {\bf D.2.1} Software, SOFTWARE ENGINEERING, Requirements/Specifications, Tools. {\bf B.6.3} Hardware, LOGIC DESIGN, Design Aids, Hardware description languages.", } @Article{Chen:1996:MPP, author = "J. Bradley Chen and Yasuhiro Endo and Kee Chan and David Mazi{\`e}res and Antonio Dias and Margo Seltzer and Michael D. Smith", title = "The Measured Performance of Personal Computer Operating Systems", journal = j-TOCS, volume = "14", number = "1", pages = "3--40", month = feb, year = "1996", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-1/p3-chen/", abstract = "This article presents a comparative study of the performance of three operating systems that run on the personal computer architecture derived form the IBM-PC. The operating systems, Windows for Workgroups, Windows NT, and NetBSD (a freely available variant of the UNIX operating system), cover a broad range of system functionality and user requirements, from a single-address-space model to full protection with preemptive multitasking. Our measurements are enable by hardware counters in Intel's Pentium processor that permit measurement of a broad range of processor events including instruction counts and on-chip cache miss counts. We use both microbenchmarks, which expose specific difference between the systems, and application workloads, which provide an indication of expected end-to-end performance. Our microbenchmark results show that accessing system functionality is often more expensive in Windows for Workgroups than in the other two systems due to frequent changes in machine mode and the use of system call hooks. When running native applications, Windows NT is more efficient than Windows, but it incurs overhead similar to that of a microkernel, since its application interface (the Win32 API) is implemented as a user-level server. Overall, system functionality can be accessed most efficiently in NetBSD; we attribute this to its monolithic structure and to the absence of the complications created by hardware backward-compatibility requirements in the other systems. Measurements of application performance show that although the impact of these differences is significant in terms of instruction counts and other hardware events (often a factor of 2 to 7 difference between the systems), overall performance is sometimes determined by the functionality provided by specific subsystems, such as the graphics subsystem or the file system buffer cache.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "measurement; performance", subject = "{\bf D.4.8} Software, OPERATING SYSTEMS, Performance. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS. {\bf D.4.0} Software, OPERATING SYSTEMS, General. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design.", } @Article{Anderson:1996:SNF, author = "Thomas E. Anderson and Michael D. Dahlin and Jeanna M. Neefe and David A. Patterson and Drew S. Roselli and Randolph Y. Wang", title = "Serverless Network File Systems", journal = j-TOCS, volume = "14", number = "1", pages = "41--79", month = feb, year = "1996", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-1/p41-anderson/", abstract = "We propose a new paradigm for network file system design: {\em serverless network file systems}. While traditional network file systems rely on a central server machine, a serverless system utilizes workstations cooperating as peers to provide all file system services. Any machine in the system can store, cache, or control any block of data. Our approach uses this location independence, in combination with fast local area networks, to provide better performance and scalability than traditional file systems. Furthermore, because any machine in the system can assume the responsibilities of a failed component, our serverless design also provides high availability via redundant data storage. To demonstrate our approach, we have implemented a prototype serverless network file system called xFS. Preliminary performance measurements suggest that our architecture achieves its goal of scalability. For instance, in a 32-node xFS system with 32 active clients, each client receives nearly as much read or write throughput as it would see if it were the only active client.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; measurement; performance; reliability", subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Access methods. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Allocation/deallocation strategies. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Checkpoint/restart. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements. {\bf E.5} Data, FILES, Organization/structure. {\bf H.3.2} Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Storage, File organization. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Secondary storage. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Directory structures. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Distributed file systems. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, File organization. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Simulation. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Network operating systems.", } @Article{Bressoud:1996:HBF, author = "Thomas C. Bressoud and Fred B. Schneider", title = "Hypervisor-Based Fault Tolerance", journal = j-TOCS, volume = "14", number = "1", pages = "80--107", month = feb, year = "1996", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-1/p80-bressoud/", abstract = "Protocols to implement a fault-tolerant computing system are described. These protocols augment the hypervisor of a virtual-machine manager and coordinate a primary virtual machine with its backup. No modifications to the hardware, operating system, or application programs are required. A prototype system was constructed for HP's PA-RISC instruction-set architecture. Even though the prototype was not carefully tuned, it ran programs about a factor of 2 slower than a bare machine would.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; reliability", subject = "{\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Network operating systems. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Checkpoint/restart.", } @Article{Wilkes:1996:HAH, author = "John Wilkes and Richard Golding and Carl Staelin and Tim Sullivan", title = "The {HP AutoRAID} Hierarchical Storage System", journal = j-TOCS, volume = "14", number = "1", pages = "108--136", month = feb, year = "1996", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-1/p108-wilkes/", abstract = "Configuring redundant disk arrays is a black art. To configure an array properly, a system administrator must understand the details of both the array and the workload it will support. Incorrect understanding of either, or changes in the workload over time, can lead to poor performance. We present a solution to this problem: a two-level storage hierarchy implemented inside a single disk-array controller. In the upper level of this hierarchy, two copies of active data are stored to provide full redundancy and excellent performance. In the lower level, RAID 5 parity protection is used to provide excellent storage cost for inactive data, at somewhat lower performance. The technology we describe in this article, know as HP AutoRAID, automatically and transparently manages migration of data blocks between these two levels as access patterns change. The result is a fully redundant storage system that is extremely easy to use, is suitable for a wide variety of workloads, is largely insensitive to dynamic workload changes, and performs much better than disk arrays with comparable numbers of spindles and much larger amounts of front-end RAM cache. Because the implementation of the HP AutoRAID technology is almost entirely in software, the additional hardware cost for these benefits is very small. We describe the HP AutoRAID technology in detail, provide performance data for an embodiment of it in a storage array, and summarize the results of simulation studies used to choose algorithms implemented in the array.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; performance; reliability", subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Secondary storage. {\bf B.4.2} Hardware, INPUT/OUTPUT AND DATA COMMUNICATIONS, Input/Output Devices, Channels and controllers. {\bf B.4.5} Hardware, INPUT/OUTPUT AND DATA COMMUNICATIONS, Reliability, Testing, and Fault-Tolerance**, Redundant design**. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Mass storage.", } @Article{Grimshaw:1996:PRT, author = "Andrew S. Grimshaw and Jon B. Weissman and W. Timothy Strayer", title = "Portable Run-Time Support for Dynamic Object-Oriented Parallel Processing", journal = j-TOCS, volume = "14", number = "2", pages = "139--170", month = may, year = "1996", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-2/p139-grimshaw/", abstract = "Mentat is an object-oriented parallel processing system designed to simplify the task of writing portable parallel programs for parallel machines and workstation networks. The Mentat compiler and run-time system work together to automatically manage the communication and synchronization between objects. The run-time system marshals member function arguments, schedules objects on processors, and dynamically constructs and executes large-grain data dependence graphs. In this article we present the Mentat run-time system. We focus on three aspects---the software architecture, including the interface to the compiler and the structure and interaction of the principle components of the run-time system; the run-time overhead on a component-by-component basis for two platforms, a Sun SPARCstation 2 and an Intel Paragon; and an analysis of the minimum granularity required for application programs to overcome the run-time overhead.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "languages; performance", subject = "{\bf D.3.4} Software, PROGRAMMING LANGUAGES, Processors, Run-time environments. {\bf D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent Programming, Parallel programming. {\bf D.1.5} Software, PROGRAMMING TECHNIQUES, Object-oriented Programming. {\bf D.3.2} Software, PROGRAMMING LANGUAGES, Language Classifications, Concurrent, distributed, and parallel languages. {\bf D.3.2} Software, PROGRAMMING LANGUAGES, Language Classifications, Object-oriented languages.", } @Article{Hardy:1996:CIE, author = "Darren R. Hardy and Michael F. Schwartz", title = "Customized Information Extraction as a Basis for Resource Discovery", journal = j-TOCS, volume = "14", number = "2", pages = "171--199", month = may, year = "1996", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-2/p171-hardy/", abstract = "Indexing file contents is a powerful means of helping users locate documents, software, and other types of data among large repositories. In environments that contain many different types of data, content indexing requires type-specific processing to extract information effectively. We present a model for type-specific, user-customizable information extraction, and a system implementation called {\em Essence}. This software structure allows users to associate specialized extraction methods with ordinary files, providing the illusion of an object-oriented file system that encapsulates indexing methods within files. By exploiting the semantics of common file types, Essence generates compact yet representative file summaries that can be used to improve both browsing and indexing in resource discovery systems. Essence can extract information from most of the types of files found in common file systems, including files with nested structure (such as compressed ``tar'' files). Essence interoperates with a number of different search/index systems (such as WAIS and Glimpse), as part of the Harvest system.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; experimentation; measurement", subject = "{\bf H.3.1} Information Systems, INFORMATION STORAGE AND RETRIEVAL, Content Analysis and Indexing. {\bf H.3.4} Information Systems, INFORMATION STORAGE AND RETRIEVAL, Systems and Software, Information networks. {\bf E.5} Data, FILES, Organization/structure. {\bf H.5.2} Information Systems, INFORMATION INTERFACES AND PRESENTATION, User Interfaces.", } @Article{Spasojevic:1996:ESW, author = "Mirjana Spasojevic and M. Satyanarayanan", title = "An Empirical Study of a Wide-Area Distributed File System", journal = j-TOCS, volume = "14", number = "2", pages = "200--222", month = may, year = "1996", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-2/p200-spasojevic/", abstract = "The evolution of the Andrew File System (AFS) into a wide-area distributed file system has encouraged collaboration and information dissemination on a much broader scale than ever before. We examine AFS as a provider of wide-area file services to over 100 organizations around the world. We discuss usage characteristics of AFS derived from empirical measurements of the system. Our observations indicate that AFS provides robust and efficient data access in its current configuration, thus confirming its viability as a design point for wide-area distributed file systems.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; experimentation; measurement; performance", subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Distributed file systems. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements.", } @Article{Corbett:1996:VPF, author = "Peter F. Corbett and Dror G. Feitelson", title = "The {Vesta} Parallel File System", journal = j-TOCS, volume = "14", number = "3", pages = "225--264", month = aug, year = "1996", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-3/p225-corbett/", abstract = "The Vesta parallel file system is designed to provide parallel file access to application programs running on multicomputers with parallel I/O subsystems. Vesta uses a new abstraction of files: a file is not a sequence of bytes, but rather it can be partitioned into multiple disjoint sequences that are accessed in parallel. The partitioning---which can also be changed dynamically---reduces the need for synchronization and coordination during the access. Some control over the layout of data is also provided, so the layout can be matched with the anticipated access patterns. The system is fully implemented and forms the basis for the AIX Parallel I/O File System on the IBM SP2. The implementation does not compromise scalability or parallelism. In fact, all data accesses are done directly to the I/O node that contains the requested data, without any indirection or access to shared metadata. Disk mapping and caching functions are confined to each I/O node, so there is no need to keep data coherent across nodes. Performance measurements shown good scalability with increased resources. Moreover, different access patterns are show to achieve similar performance.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; experimentation; performance", subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Distributed file systems. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Input/output. {\bf E.5} Data, FILES, Organization/structure. {\bf D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent Programming, Parallel programming. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Concurrency. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Parallel processors**.", } @Article{Cristian:1996:FTA, author = "Flaviu Cristian and Bob Dancey and Jon Dehn", title = "Fault-tolerance in Air Traffic Control Systems", journal = j-TOCS, volume = "14", number = "3", pages = "265--286", month = aug, year = "1996", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-3/p265-cristian/", abstract = "The distributed real-time system services developed by Lockheed Martin's Air Traffic Management group serve the infrastructure for a number of air traffic control systems. Either completed development or under development are the US Federal Aviation Administration's Display System Replacement (DSR) system, the UK Civil Aviation Authority's New Enroute Center (NERC) system, and the Republic of China's Air Traffic Control Automated System (ATCAS). These systems are intended to replace present en route systems over the next decade. High availability of air traffic control services is an essential requirement of these systems. This article discusses the general approach to fault-tolerance adopted in this infrastructure, by reviewing some of the questions which were asked during the system design, various alternative solutions considered, and the reasons for the design choices made. The aspects of this infrastructure chosen for the individual ATC systems mentioned above, along with the status of those systems, are presented in the Section 11 of the article.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; reliability", subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Reliability, availability, and serviceability. {\bf D.2.5} Software, SOFTWARE ENGINEERING, Testing and Debugging, Error handling and recovery. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf J.7} Computer Applications, COMPUTERS IN OTHER SYSTEMS, Real time. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Real-time systems and embedded systems.", } @Article{Devarakonda:1996:RCF, author = "Murthy Devarakonda and Bill Kish and Ajay Mohindra", title = "Recovery in the {Calypso} File System", journal = j-TOCS, volume = "14", number = "3", pages = "287--310", month = aug, year = "1996", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-3/p287-devarakonda/", abstract = "This article presents the deign and implementation of the recovery scheme in Calypso. Calypso is a cluster-optimized, distributed file system for UNIX clusters. As in Sprite and AFS, Calypso servers are stateful and scale well to a large number of clients. The recovery scheme in Calypso is nondisruptive, meaning that open files remain open, client modified data are saved, and in-flight operations are properly handled across server recover. The scheme uses distributed state amount the clients to reconstruct the server state on a backup node if disks are multiported or on the rebooted server node. It guarantees data consistency during recovery and provides congestion control. Measurements show that the state reconstruction can be quite fast: for example, in a 32-node cluster, when an average node contains state for about 420 files, the reconstruction time is about 3.3 seconds. However, the time to update a file system after a failure can be a major factor in the overall recovery time, even when using journaling techniques.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; management; measurement; reliability", subject = "{\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Distributed file systems. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Distributed systems. {\bf E.5} Data, FILES, Backup/recovery.", } @Article{Cao:1996:IPI, author = "Pei Cao and Edward W. Felten and Anna R. Karlin and Kai Li", title = "Implementation and Performance of Integrated Application-Controlled File Caching, Prefetching, and Disk Scheduling", journal = j-TOCS, volume = "14", number = "4", pages = "311--343", month = nov, year = "1996", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-4/p311-cao/", abstract = "As the performance gap between disks and microprocessors continues to increase, effective utilization of the file cache becomes increasingly important. Application-controlled file caching and prefetching can apply application-specific knowledge to improve file cache management. However, supporting application-controlled file caching and prefetching is nontrivial because caching and prefetching need to be integrated carefully, and the kernel needs to allocate cache blocks among processes appropriately. This article presents the design, implementation, and performance of a file system that integrates application-controlled caching, prefetching, and disk scheduling. We use a two-level cache management strategy. The kernel uses the LRU-SP (Least-Recently-Used with Swapping and Placeholders) policy to allocate blocks to processes, and each process integrates application-specific caching and prefetching based on the {\em controlled-aggressive\/} policy, an algorithm previously shown in a theoretical sense to be nearly optimal. Each process also improves its disk access latency by submitting its prefetches in batches so that the requests can be scheduled to optimize disk access performance. Our measurements show that this combination of techniques greatly improves the performance of the file system. We measured that the running time is reduced by 3\% to 49\% (average 26\%) for single-process workloads and by 5\% to 76\% (average 32\%) for multiprocess workloads.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; experimentation; measurement; performance", subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Secondary storage. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Design studies. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Storage hierarchies. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Access methods. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Modeling and prediction. {\bf E.5} Data, FILES, Optimization**.", } @Article{Saavedra:1996:ABC, author = "Rafael H. Saavedra and Alan J. Smith", title = "Analysis of Benchmark Characteristics and Benchmark Performance Prediction", journal = j-TOCS, volume = "14", number = "4", pages = "344--384", month = nov, year = "1996", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-4/p344-saavedra/", abstract = "Standard benchmarking provides to run-times for given programs on given machines, but fails to provide insight as to why those results were obtained (either in terms of machine or program characteristics) and fails to provide run-times for that program on some other machine, or some other programs on that machine. We have developed a machine-independent model of program execution to characterize both machine performance and program execution. By merging these machine and program characterizations, we can estimate execution time for arbitrary machine/program combinations. Our technique allows us to identify those operations, either on the machine or in the programs, which dominate the benchmark results. This information helps designers in improving the performance of future machines and users in tuning their applications to better utilize the performance of existing machines. Here we apply our methodology to characterize benchmarks and predict their execution times. We present extensive run-time statistics for a large set of benchmarks including the SPEC and Perfect Club suites. We show how these statistics can be used to identify important shortcoming in the programs. In addition, we give execution time estimates for a large sample of programs and machines and compare these against benchmark results. Finally, we develop a metric for program similarity that makes it possible to classify benchmarks with respect to a large set of characteristics.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "measurement; performance", subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Measurement techniques. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Modeling techniques. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Performance attributes. {\bf D.2.8} Software, SOFTWARE ENGINEERING, Metrics, Performance measures. {\bf I.6.4} Computing Methodologies, SIMULATION AND MODELING, Model Validation and Analysis.", } @Article{Shavit:1996:DT, author = "Nir Shavit and Asaph Zemach", title = "Diffracting Trees", journal = j-TOCS, volume = "14", number = "4", pages = "385--428", month = nov, year = "1996", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-4/p385-shavit/", abstract = "Shared counters are among the most basic coordination structures in multiprocessor computation, with applications ranging from barrier synchronization to concurrent-data-structure design. This article introduces diffracting trees, novel data structures for share counting and load balancing in a distributed/parallel environment. Empirical evidence, collected on a simulated distributed shared-memory machine and several simulated message-passing architectures, shows that diffracting trees scale better and are more robust than both combining trees and counting networks, currently the most effective known methods for implementing concurrent counters in software. The use of a randomized coordination method together with a combinatorial data structure overcomes the resiliency drawbacks of combining trees. Our simulations show that to handle the same load, diffracting trees and counting networks should have a similar width {\em w}, yet the depth of a diffracting tree is {\em O\/}(log {\em w\/}), whereas counting networks have depth {\em O\/}(log2 {\em w\/}). Diffracting trees have already been used to implement highly efficient producer/consumer queues, and we believe diffraction will prove to be an effective alternative paradigm to combining and queue-locking in the design of many concurrent data structures.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; performance", subject = "{\bf E.1} Data, DATA STRUCTURES. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors). {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Synchronization. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Distributed systems.", } @Article{Anonymous:1996:AI, author = "Anonymous", title = "Author Index", journal = j-TOCS, volume = "14", number = "4", pages = "429--430", month = nov, year = "1996", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-4/p429-author_index/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", subject = "{\bf A.2} General Literature, REFERENCE.", } @Article{Birman:1997:EEP, author = "Kenneth P. Birman", title = "Editorial: Electronic Publication of {TOCS}", journal = j-TOCS, volume = "15", number = "1", pages = "1--1", month = feb, year = "1997", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-1/p1-birman/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Kontothanassis:1997:SCS, author = "Leonidas I. Kontothanassis and Robert W. Wisniewski and Michael L. Scott", title = "Scheduler-Conscious Synchronization", journal = j-TOCS, volume = "15", number = "1", pages = "3--40", month = feb, year = "1997", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-1/p3-kontothanassis/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; performance; reliability", subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Synchronization. {\bf D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent Programming, Parallel programming. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Multiprocessing/multiprogramming/multitasking.", } @Article{Kotz:1997:DDM, author = "David Kotz", title = "Disk-Directed {I/O} for {MIMD} Multiprocessors", journal = j-TOCS, volume = "15", number = "1", pages = "41--74", month = feb, year = "1997", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-1/p41-kotz/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; experimentation; performance", subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Access methods. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, File organization. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Simulation. {\bf E.5} Data, FILES.", } @Article{Steenkiste:1997:HSN, author = "Peter Steenkiste", title = "A High-Speed Network Interface for Distributed-Memory Systems: Architecture and Applications", journal = j-TOCS, volume = "15", number = "1", pages = "75--109", month = feb, year = "1997", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-1/p75-steenkiste/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "management; performance; reliability", subject = "{\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Network communication. {\bf B.4.3} Hardware, INPUT/OUTPUT AND DATA COMMUNICATIONS, Interconnections (Subsystems), Interfaces. {\bf C.0} Computer Systems Organization, GENERAL, System architectures. {\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols, Protocol architecture. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Buffering.", } @Article{Anderson:1997:DRA, author = "David P. Anderson", title = "Device Reservation in Audio\slash Video Editing Systems", journal = j-TOCS, volume = "15", number = "2", pages = "111--133", month = may, year = "1997", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-2/p111-anderson/", abstract = "What fraction of disks and other shared devices must be reserved to play an audio/video document without dropouts? In general, this question cannot be answered precisely. For documents with complex and irregular structure, such as those arising in audio/video editing, it is difficult even to give a good estimate. We describe three approaches to this problem. The first, based on long-term average properties of segments, is fast but imprecise: it underreserves in some cases and overreserves in others. The second approach models individual disk and network operations. It is precise but slow. The third approach, a hybrid, is both precise and fast.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; performance", subject = "{\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Real-time systems and embedded systems. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Modeling and prediction. {\bf H.5.1} Information Systems, INFORMATION INTERFACES AND PRESENTATION, Multimedia Information Systems, Audio input/output. {\bf H.5.1} Information Systems, INFORMATION INTERFACES AND PRESENTATION, Multimedia Information Systems, Video (e.g., tape, disk, DVI).", } @Article{Anderson:1997:RTC, author = "James H. Anderson and Srikanth Ramamurthy and Kevin Jeffay", title = "Real-time Computing with Lock-Free Shared Objects", journal = j-TOCS, volume = "15", number = "2", pages = "134--165", month = may, year = "1997", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-2/p134-anderson/", abstract = "This article considers the use of lock-free shared objects within hard real-time systems. As the name suggests, {\em lock-free\/} shared objects are distinguished by the fact that they are accessed without locking. As such, they do not give rise to priority inversions, a key advantage over conventional, lock-based object-sharing approaches. Despite this advantage, it is not immediately apparent that lock-free shared objects can be employed if tasks must adhere to strict timing constraints. In particular, lock-free object implementations permit concurrent operations to interfere with each other, and repeated interferences can cause a given operation to take an arbitrarily long time to complete. The main contribution of this article is to show that such interferences can be bounded by judicious scheduling. This work pertains to periodic, hard real-time tasks that share lock-free objects on a uniprocessor. In the first part of the article, scheduling conditions are derived for such tasks, for both static and dynamic priority schemes. Based on these conditions, it is formally shown that lock-free shared objects often incur less overhead than object implementations based on wait-free algorithms or lock-based schemes. In the last part of the article, this conclusion is validated experimentally through work involving a real-time desktop videoconferencing system.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; experimentation; performance; theory", subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Concurrency. {\bf C.3} Computer Systems Organization, SPECIAL-PURPOSE AND APPLICATION-BASED SYSTEMS, Real-time and embedded systems. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Multiprocessing/multiprogramming/multitasking. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Mutual exclusion. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Scheduling. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Synchronization. {\bf J.7} Computer Applications, COMPUTERS IN OTHER SYSTEMS, Real time.", } @Article{Mahmood:1997:OAM, author = "Ausif Mahmood and Donald J. Lynch and Roger B. Shaffer", title = "Optimally Adaptive, Minimum-Distance, Circuit-Switched Routing in Hypercubes", journal = j-TOCS, volume = "15", number = "2", pages = "166--193", month = may, year = "1997", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-2/p166-mahmood/", abstract = "In circuit-switched routing, the path between a source and its destination is established by incrementally reserving all required links before the data transmission can begin. If the routing algorithm is not carefully designed, deadlocks can occur in reserving these links. Deadlock-free algorithms based on dimension-ordered routing, such as the {\em E-cube}, exist. However, {\em E-cube\/} does not provide any flexibility in choosing a path from a source to its destination and can thus result in long latencies under heavy or uneven traffic. Adaptive, minimum-distance routing algorithms, such as the {\em Turn Model\/} and the {\em UP Preference\/} algorithms, have previously been reported. In this article, we present a new class of adaptive, provably deadlock-free, minimum-distance routing algorithms. We prove that the algorithms developed here are optimally adaptive in the sense that any further flexibility in communication will result in deadlock. We show that the {\em Turn Model\/} is actually a member of our new class of algorithms that does not perform as well as other algorithms within the new class. It creates artificial hotspots in routing the traffic and allows fewer total paths. We present an analytical comparison of the flexibility and balance in routing provided by various algorithms and a comparison based on uniform and nonuniform traffic simulations. The {\em Extended UP Preference\/} algorithm developed in this article is shown to have improved performance with respect to existing algorithms. The methodology and the algorithms developed here can be used to develop routing for other schemes such as wormhole routing, and for other recursively defined networks such as {\em k\/}-ary {\em n\/}-cubes.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; theory", subject = "{\bf C.2.1} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design, Network communications. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Interconnection architectures. {\bf C.2.1} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design, Network topology.", } @Article{Pfitzmann:1997:SLT, author = "Birgit Pfitzmann and Michael Waidner", title = "Strong Loss Tolerance of Electronic Coin Systems", journal = j-TOCS, volume = "15", number = "2", pages = "194--213", month = may, year = "1997", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-2/p194-pfitzmann/", abstract = "Untraceable electronic cash means prepaid digital payment systems, usually with offline payments, that protect user privacy. Such systems have recently been given considerable attention by both theory and development projects. However, in most current schemes, loss of a user device containing electronic cash implies a loss of money, just as with real cash. In comparison with credit schemes, this is considered a serious shortcoming. This article shows how untraceable electronic cash can be made loss tolerant, i.e., how the monetary value of the lost data can be recovered. Security against fraud and preservation of privacy are ensured; strong loss tolerance means that not even denial of recovery is possible. In particular, systems based on electronic coins are treated. We present general design principles and options and their instantiation in one concrete payment system. The measures are practical.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; reliability; security", subject = "{\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection, Cryptographic controls. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed applications. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf H.4.3} Information Systems, INFORMATION SYSTEMS APPLICATIONS, Communications Applications. {\bf K.6.5} Computing Milieux, MANAGEMENT OF COMPUTING AND INFORMATION SYSTEMS, Security and Protection. {\bf K.4.0} Computing Milieux, COMPUTERS AND SOCIETY, General.", } @Article{Mogul:1997:ERL, author = "Jeffrey C. Mogul and K. K. Ramakrishnan", title = "Eliminating Receive Livelock in an Interrupt-Driven Kernel", journal = j-TOCS, volume = "15", number = "3", pages = "217--252", month = aug, year = "1997", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-3/p217-mogul/", abstract = "Most operating systems use interface interrupts to schedule network tasks. Interrupt-driven systems can provide low overhead and good latency at low offered load, but degrade significantly at higher arrival rates unless care is taken to prevent several pathologies. These are various forms of{\bf receive livelock}, in which the system spends all of its time processing interrupts, to the exclusion of other necessary tasks. Under extreme conditions, no packets are delivered to the user application or the output of the system. To avoid livelock and related problems, an operating system must schedule network interrupt handling as carefully as it schedules process execution. We modified an interrupt-driven networking implementation to do so; this modification eliminates receive livelock without degrading other aspects of system performance. Our modifications include the use of polling when the system is heavily loaded, while retaining the use of interrupts ur.Jer lighter load. We present measurements demonstrating the success of our approach.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "performance", subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Scheduling. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Input/output. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Network communication. {\bf C.2.0} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, General.", } @Article{Harchol-Balter:1997:EPL, author = "Mor Harchol-Balter and Allen B. Downey", title = "Exploiting Process Lifetime Distributions for Dynamic Load Balancing", journal = j-TOCS, volume = "15", number = "3", pages = "253--285", month = aug, year = "1997", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-3/p253-harchol-balter/", abstract = "We consider policies for CPU load balancing in networks of workstations. We address the question of whether preemptive migration (migrating active processes) is necessary, or whether remote execution (migrating processes only at the time of birth) is sufficient for load balancing. We show that resolving this issue is strongly tied to understanding the process lifetime distribution. Our measurements indicate that the distribution of lifetimes for a UNIX process is Pareto (heavy-tailed), with a consistent functional form over a variety of workloads. We show how to apply this distribution to derive a preemptive migration policy that requires no hand-tuned parameters. We used a trace-driven simulation to show that our preemptive migration strategy is far more effective than remote execution, even when the memory transfer cost is high.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; measurement; performance", subject = "{\bf C.2.3} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Operations, Network management. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS. {\bf C.5.3} Computer Systems Organization, COMPUTER SYSTEM IMPLEMENTATION, Microcomputers. {\bf G.3} Mathematics of Computing, PROBABILITY AND STATISTICS. {\bf G.m} Mathematics of Computing, MISCELLANEOUS. {\bf I.6.0} Computing Methodologies, SIMULATION AND MODELING, General. {\bf C.2.3} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Operations, Network monitoring.", } @Article{Krieger:1997:HPO, author = "Orran Krieger and Michael Stumm", title = "{HFS}: a Performance-Oriented Flexible File System Based on Building-Block Compositions", journal = j-TOCS, volume = "15", number = "3", pages = "286--321", month = aug, year = "1997", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-3/p286-krieger/", abstract = "The Hurricane File System (HFS) is designed for (potentially large-scale) shared-memory multiprocessors. Its architecture is based on the principle that, in order to maximize performance for applications with diverse requirements, a file system must support a wide variety of file structures, file system policies, and I/O interfaces. Files in HFS are implemented using simple building blocks composed in potentially complex ways. This approach yields great flexibility, allowing an application to customize the structure and policies of a file to exactly meet its requirements. As an extreme example, HFS allows a file's structure to be optimized for concurrent random-access write-only operations by 10 threads, something no other file system can do. Similarly, the prefetching, locking, and file cache management policies can all be chosen to match an application's access pattern. In contrast, most parallel file systems support a single file structure and a small set of policies. We have implemented HFS as part of the Hurricane operating system running on the Hector shared-memory multiprocessor. We demonstrate that the flexibility of HFS comes with little processing or I/O overhead. We also show that for a number of file access patterns, HFS is able to deliver to the applications the full I/O bandwidth of the disks on our system.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; performance", subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, File organization. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Access methods. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements. {\bf E.5} Data, FILES, Optimization**. {\bf E.5} Data, FILES, Organization/structure.", } @Article{Lo:1997:CTL, author = "Jack L. Lo and Joel S. Emer and Henry M. Levy and Rebecca L. Stamm and Dean M. Tullsen", title = "Converting Thread-Level Parallelism to Instruction-Level Parallelism via Simultaneous Multithreading", journal = j-TOCS, volume = "15", number = "3", pages = "322--354", month = aug, year = "1997", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-3/p322-lo/", abstract = "To achieve high performance, contemporary computer systems rely on two forms of parallelism: instruction-level parallelism (ILP) and thread-level parallelism (TLP). Wide-issue super-scalar processors exploit ILP by executing multiple instructions from a single program in a single cycle. Multiprocessors (MP) exploit TLP by executing different threads in parallel on different processors. Unfortunately, both parallel processing styles statically partition processor resources, thus preventing them from adapting to dynamically changing levels of ILP and TLP in a program. With insufficient TLP, processors in an MP will be idle; with insufficient ILP, multiple-issue hardware on a superscalar is wasted. This article explores parallel processing on an alternative architecture, simultaneous multithreading (SMT), which allows multiple threads to complete for and share all of the processor's resources every cycle. The most compelling reason for running parallel applications on an SMT processor is its ability to use thread-level parallelism and instruction-level parallelism interchangeably. By permitting multiple threads to share the processor's functional units simultaneously, the processor can use both ILP and TLP to accommodate variations in parallelism. When a program has only a single thread, all of the SMT processor's resources can be dedicated to that thread; when more TLP exists, this parallelism can compensate for a lack of per-thread ILP. We examine two alternative on-chip parallel architectures for the next generation of processors. We compare SMT and small-scale, on-chip multiprocessors in their ability to exploit both ILP and TLP. First, we identify the hardware bottlenecks that prevent multiprocessors from effectively exploiting ILP. Then, we show that because of its dynamic resource sharing, SMT avoids these inefficiencies and benefits from being able to run more threads on a single processor. The use of TLP is especially advantageous when per-thread ILP is limited. The ease of adding additional thread contexts on an SMT (relative to adding additional processors on an MP) allows simultaneous multithreading to expose more parallelism, further increasing functional unit utilization and attaining a 52\% average speedup (versus a four-processor, single-chip multiprocessor with comparable execution resources). This study also addresses an often-cited concern regarding the use of thread-level parallelism or multithreading: interference in the memory system and branch prediction hardware. We find the multiple threads cause interthread interference in the caches and place greater demands on the memory system, thus increasing average memory latencies. By exploiting threading-level parallelism, however, SMT hides these additional latencies, so that they only have a small impact on total program performance. We also find that for parallel applications, the additional threads have minimal effects on branch prediction.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "measurement; performance", subject = "{\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Parallel processors**. {\bf C.0} Computer Systems Organization, GENERAL, Instruction set design. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management.", } @Article{Levy:1997:GE, author = "Henry M. Levy", title = "Guest Editorial", journal = j-TOCS, volume = "15", number = "4", pages = "355--356", month = nov, year = "1997", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-4/p355-levy/", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Anderson:1997:CPW, author = "Jennifer M. Anderson and Lance M. Berc and Jeffrey Dean and Sanjay Ghemawat and Monika R. Henzinger and Shun-Tak A. Leung and Richard L. Sites and Mark T. Vandevoorde and Carl A. Waldspurger and William E. Weihl", title = "Continuous Profiling: Where Have All the Cycles Gone?", journal = j-TOCS, volume = "15", number = "4", pages = "357--390", month = nov, year = "1997", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-4/p357-anderson/", abstract = "This article describes the Digital Continuous Profiling Infrastructure, a sampling-based profiling system designed to run continuously on production systems. The system supports multiprocessors, works on unmodified executables, and collects profiles for entire systems, including user programs, shared libraries, and the operating system kernel. Samples are collected at a high rate (over 5200 samples/sec. per 333MHz processor), yet with low overhead (1-3\% slowdown for most workloads). Analysis tools supplied with the profiling system use the sample data to produce a precise and accurate accounting, down to the level of pipeline stalls incurred by individual instructions, of where time is bring spent. When instructions incur stalls, the tools identify possible reasons, such as cache misses, branch mispredictions, and functional unit contention. The fine-grained instruction-level analysis guides users and automated optimizers to the causes of performance problems and provides important insights for fixing them.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "performance", subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Performance attributes. {\bf D.2.2} Software, SOFTWARE ENGINEERING, Design Tools and Techniques. {\bf D.2.6} Software, SOFTWARE ENGINEERING, Programming Environments. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance. {\bf D.4.0} Software, OPERATING SYSTEMS, General.", } @Article{Savage:1997:EDD, author = "Stefan Savage and Michael Burrows and Greg Nelson and Patrick Sobalvarro and Thomas Anderson", title = "{Eraser}: a Dynamic Data Race Detector for Multithreaded Programs", journal = j-TOCS, volume = "15", number = "4", pages = "391--411", month = nov, year = "1997", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-4/p391-savage/", abstract = "Multithreaded programming is difficult and error prone. It is easy to make a mistake in synchronization that produces a data race, yet it can be extremely hard to locate this mistake during debugging. This article describes a new tool, called Eraser, for dynamically detecting data races in lock-based multithreaded programs. Eraser uses binary rewriting techniques to monitor every shared-monory reference and verify that consistent locking behavior is observed. We present several case studies, including undergraduate coursework and a multithreaded Web search engine, that demonstrate the effectiveness of this approach.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; experimentation; reliability", subject = "{\bf D.2.5} Software, SOFTWARE ENGINEERING, Testing and Debugging, Monitors. {\bf D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent Programming, Parallel programming. {\bf D.2.5} Software, SOFTWARE ENGINEERING, Testing and Debugging, Debugging aids. {\bf D.2.5} Software, SOFTWARE ENGINEERING, Testing and Debugging, Tracing. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Concurrency. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Deadlocks. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Multiprocessing/multiprogramming/multitasking. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Mutual exclusion.", } @Article{Bugnion:1997:DRC, author = "Edouard Bugnion and Scott Devine and Kinshuk Govil and Mendel Rosenblum", title = "{Disco}: Running Commodity Operating Systems on Scalable Multiprocessors", journal = j-TOCS, volume = "15", number = "4", pages = "412--447", month = nov, year = "1997", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-4/p412-bugnion/", abstract = "In this article we examine the problem of extending modern operating systems to run efficiently on large-scale shared-memory multiprocessors without a large implementation effort. Our approach brings back an idea popular in the 1970s: virtual machine monitors. We use virtual machines to run multiple commodity operating systems on a scalable multiprocessor. This solution addresses many of the challenges facing the system software for these machines. We demonstrate our approach with a prototype called Disco that runs multiple copies of Silicon Graphics' IRIX operating system on a multiprocessor. Our experience shows that the overheads of the monitor are small and that the approach provides scalability as well as the ability to deal with the nonuniform memory access time of these systems. To reduce the memory overheads associated with running multiple operating systems, virtual machines transparently share major data structures such as the program code and the file system buffer cache. We use the distributed-system support of modern operating systems to export a partial single system image to the users. The overall solution achieves most of the benefits of operating systems customized for scalable multiprocessors, yet it can be achieved with a significantly smaller implementation effort.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; experimentation; performance", subject = "{\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Parallel processors**.", } @Article{Bal:1998:PEO, author = "Henri E. Bal and Raoul Bhoedjang and Rutger Hofman and Ceriel Jacobs and Koen Langendoen and Tim R{\"u}hl and M. Frans Kaashoek", title = "Performance Evaluation of the {Orca} Shared-Object System", journal = j-TOCS, volume = "16", number = "1", pages = "1--40", month = feb, year = "1998", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1998-16-1/p1-bal/", abstract = "Orca is a portable, object-based distributed shared memory (DSM) system. This article studies and evaluates the design choices made in the Orca system and compares Orca with other DSMs. The article gives a quantitative analysis of Orca's coherence protocol (based on write-updates with function shipping), the totally ordered group communication protocol, the strategy for object placement, and the all-software, user-space architecture. Performance measurements for 10 parallel applications illustrate the trade-offs made in the design of Orca and show that essentially the right design decisions have been made. A write-update protocol with function shipping is effective for Orca, especially since it is used in combination with techniques that avoid replicating objects that have a low read/write ratio. The overhead of totally ordered group communication on application performance is low. The Orca system is able to make near-optimal decisions for object placement and replication. In addition, the article compares the performance of Orca with that of a page-based DSM (TreadMarks) and another object-based DSM (CRL). It also analyzes the communication overhead of the DSMs for several applications. All performance measurements are done on a 32-node Pentium Pro cluster with Myrinet and Fast Ethernet networks. The results show that Orca programs send fewer messages and less data than the TreadMarks and CRL programs and obtain better speedups.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; languages; performance", subject = "{\bf D.3.4} Software, PROGRAMMING LANGUAGES, Processors, Run-time environments. {\bf D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent Programming, Distributed programming. {\bf D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent Programming, Parallel programming. {\bf D.3.2} Software, PROGRAMMING LANGUAGES, Language Classifications, Concurrent, distributed, and parallel languages. {\bf D.3.4} Software, PROGRAMMING LANGUAGES, Processors, Compilers.", } @Article{Derk:1998:RFT, author = "M. D. Derk and L. S. DeBrunner", title = "Reconfiguration for Fault Tolerance Using Graph Grammars", journal = j-TOCS, volume = "16", number = "1", pages = "41--54", month = feb, year = "1998", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1998-16-1/p41-derk/", abstract = "Reconfiguration for fault tolerance is a widely studied field, but this work applies graph grammars to this discipline for the first time. Reconfiguration Graph Grammars (RGG) are defined and applied to the definition of processor array reconfiguration algorithms. The nodes of a graph are associated with the processors of a processor array, and the edges are associated with those interprocessor communication lines that are active. The resulting algorithms for dynamic (run-time) reconfiguration are efficient and can be implemented distributively.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; design; reliability; theory", subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Reliability, availability, and serviceability. {\bf F.4.2} Theory of Computation, MATHEMATICAL LOGIC AND FORMAL LANGUAGES, Grammars and Other Rewriting Systems. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Multiple-instruction-stream, multiple-data-stream processors (MIMD).", } @Article{Mowry:1998:TLM, author = "Todd C. Mowry", title = "Tolerating Latency in Multiprocessors through Compiler-Inserted Prefetching", journal = j-TOCS, volume = "16", number = "1", pages = "55--92", month = feb, year = "1998", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1998-16-1/p55-mowry/", abstract = "The large latency of memory accesses in large-scale shared-memory multiprocessors is a key obstacle to achieving high processor utilization. {\em Software-controlled prefetching\/} is a technique for tolerating memory latency by explicitly executing instructions to move data close to the processor before the data are actually needed. To minimize the burden on the programmer, compiler support is needed to automatically insert prefetch instructions into the code. A key challenge when inserting prefetches is ensuring that the overheads of prefetching do not outweigh the benefits. While previous studies have demonstrated the effectiveness of hand-inserted prefetching in multiprocessor applications, the benefit of {\em compiler-inserted\/} prefetching in practice has remained an open question. This article proposes and evaluates a new compiler algorithm for inserting prefetches into multiprocessor code. The proposed algorithm attempts to minimize overheads by only issuing prefetches for references that are predicted to suffer cache misses. The algorithm can prefetch both dense-matrix and sparse-matrix codes, thus covering a large fraction of scientific applications. We have implemented our algorithm in the SUIF(Stanford University Intermediate Format) optimizing compiler. The results of our detailed architectural simulations demonstrate that compiler-inserted prefetching can improve the speed of some parallel applications by as much as a factor of two.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; experimentation; performance", subject = "{\bf D.3.4} Software, PROGRAMMING LANGUAGES, Processors, Optimization. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Cache memories. {\bf D.3.4} Software, PROGRAMMING LANGUAGES, Processors, Compilers.", } @Article{Agarwal:1998:TMR, author = "D. A. Agarwal and L. E. Moser and P. M. Melliar-Smith and R. K. Budhia", title = "The {Totem} Multiple-Ring Ordering and Topology Maintenance Protocol", journal = j-TOCS, volume = "16", number = "2", pages = "93--132", month = may, year = "1998", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1998-16-2/p93-agarwal/", abstract = "The Totem multiple-ring protocol provides reliable totally ordered delivery of messages across multiple local-area networks interconnected by gateways. This consistent message order is maintained in the presence of network partitioning and remerging, and of processor failure and recovery. The protocol provides accurate topology change information as part of the global total order of messages. It addresses the issue of scalability and achieves a latency that increases logarithmically with system size by exploiting process group locality and selective forwarding of messages through the gateways. Pseudocode for the protocol and an evaluation of its performance are given. ---Authors' Abstract", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "algorithms; performance; reliability", subject = "{\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols, Protocol architecture. {\bf C.2.1} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design, Network communications. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Network operating systems. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Fault tolerance.", } @Article{Lamport:1998:PTP, author = "Leslie Lamport", title = "The Part-Time Parliament", journal = j-TOCS, volume = "16", number = "2", pages = "133--169", month = may, year = "1998", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1998-16-2/p133-lamport/", abstract = "Recent archaeological discoveries on the island of Paxos reveal that the parliament functioned despite the peripatetic propensity of its part-time legislators. The legislators maintained consistent copies of the parliamentary record, despite their frequent forays from the chamber and the forgetfulness of their messengers. The Paxon parliament's protocol provides a new way of implementing the state machine approach to the design of distributed systems.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; reliability", subject = "{\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Network operating systems. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf J.1} Computer Applications, ADMINISTRATIVE DATA PROCESSING, Government.", } @Article{Horowitz:1998:IMO, author = "Mark Horowitz and Margaret Martonoisi and Todd C. Mowry and Michael D. Smith", title = "Informing Memory Operations: Memory Performance Feedback Mechanisms and Their Applications", journal = j-TOCS, volume = "16", number = "2", pages = "170--205", month = may, year = "1998", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1998-16-2/p170-horowitz/", abstract = "Memory latency is an important bottleneck in system performance that cannot be adequately solved by hardware alone. Several promising software techniques have been shown to address this problem successfully in specific situations. However, the generality of these software approaches has been limited because current architectures do not provide a fine-grained, low-overhead mechanism for observing and reacting to memory behavior directly. To fill this need, this article proposes a new class of memory operations called {\em informing memory operations}, which essentially consist of a memory operating combined (either implicitly or explicitly) with a conditional branch-and-ink operation that is taken only if the reference suffers a cache miss. This article describes two different implementations of informing memory operations. One is based on a {\em cache-outcome condition code}, and the other is based on {\em low-overhead traps.\/} We find that modern in-order-issue and out-of-order-issue superscalar processors already contain the bulk of the necessary hardware support. We describe how a number of software-based memory optimizations can exploit informing memory operations to enhance performance, and we look at cache coherence with fine-grained access control as a case study. Our performance results demonstrate that the runtime overhead of invoking the informing mechanism on the Alpha 21164 and MIPS R10000 processors is generally small enough to provide considerable flexibility to hardware and software designers, and that the cache coherence application has improved performance compared to other current solutions. We believe that the inclusion of informing memory operations in future processors may spur even more innovative performance optimizations.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; experimentation; performance", subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Cache memories. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Measurement techniques. {\bf D.3.4} Software, PROGRAMMING LANGUAGES, Processors, Compilers. {\bf B.8.2} Hardware, PERFORMANCE AND RELIABILITY, Performance Analysis and Design Aids.", } @Article{Alexandrov:1998:UPG, author = "Albert D. Alexandrov and Maximilian Ibel and Klaus E. Schauser and Chris J. Scheiman", title = "{Ufo}: a Personal Global File System Based on User-Level Extensions to the Operating System", journal = j-TOCS, volume = "16", number = "3", pages = "207--233", month = aug, year = "1998", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1998-16-3/p207-alexandrov/", abstract = "In this article we show how to extend a wide range of functionality of standard operation systems completely at the user level. Our approach works by intercepting selected system calls at the user level, using tracing facilities such as the /proc file system provided by many Unix operating systems. The behavior of some intercepted system calls is then modified to implement new functionality. This approach does not require any relinking or recompilation of existing applications. In fact, the extensions can even be dynamically ``installed'' into already running processes. The extensions work completely at the user level and install without system administrator assistance. Individual users can choose what extensions to run, in effect creating a personalized operating system view for themselves. We used this approach to implement a global file system, called Ufo, which allows users to treat remote files exactly as if they were local. Currently, Ufo supports file access through the FTP and HTTP protocols and allows new protocols to be plugged in. While several other projects have implemented global file system abstractions, they all require either changes to the operating system or modifications to standard libraries. The article gives a detailed performance analysis of our approach to extending the OS and establishes that Ufo introduces acceptable overhead for common applications even though intercepting individual system calls incurs a high cost.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "performance", subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Distributed file systems. {\bf D.4.3} Software, OPERATING SYSTEMS, File Systems Management, Access methods.", } @Article{Gabbay:1998:UVP, author = "Freddy Gabbay and Avi Mendelson", title = "Using Value Prediction to Increase the Power of Speculative Execution Hardware", journal = j-TOCS, volume = "16", number = "3", pages = "234--270", month = aug, year = "1998", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1998-16-3/p234-gabbay/", abstract = "This article presents an experimental and analytical study of value prediction and its impact on speculative execution in superscalar microprocessors. Value prediction is a new paradigm that suggests predicting outcome values of operations (at run-time ) and using these predicted values to trigger the execution of true-data-dependent operations speculatively. As a result, stals to memory locations can be reduced and the amount of instruction-level parallelism can be extended beyond the limits of the program's dataflow graph. This article examines the characteristics of the value prediction concept from two perspectives: (1) the related phenomena that are reflected in the nature of computer programs and (2) the significance of these phenomena to boosting instruction-level parallelism of superscalar microprocessors that support speculative execution. In order to better understand these characteristics, our work combines both analytical and experimental studies.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; experimentation; measurement; performance", subject = "{\bf C.0} Computer Systems Organization, GENERAL, System architectures. {\bf C.1.1} Computer Systems Organization, PROCESSOR ARCHITECTURES, Single Data Stream Architectures, RISC. {\bf C.5.3} Computer Systems Organization, COMPUTER SYSTEM IMPLEMENTATION, Microcomputers, Microprocessors. {\bf C.0} Computer Systems Organization, GENERAL, Instruction set design.", } @Article{Juurlink:1998:QCP, author = "Ben H. H. Juurlink and Harry A. G. Wijshoff", title = "A Quantitative Comparison of Parallel Computation Models", journal = j-TOCS, volume = "16", number = "3", pages = "271--318", month = aug, year = "1998", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 13 18:36:53 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1998-16-3/p271-juurlink/", abstract = "In recent years, a large number of parallel computation models have been proposed to replace the PRAM as the parallel computation model presented to the algorithm designer. Although mostly the theoretical justifications for these models are sound, and many algorithmic results where obtained through these models, little experimentation has been conducted to validate the effectiveness of these models for developing cost-effective algorithms and applications on existing hardware platforms. In this article a first attempt is made to perform a detailed experimental account on the preciseness of these models. The achieve this, three models (BSP, E-BSP, and BPRAM) were selected and validated on five parallel platforms (Cray T3E, Thinking Machines CM-5, Intel Paragon, MasPar MP-1, and Parsytec GCel). The work described in this article consists of three parts. First, the predictive capabilities of the models are investigated. Unlike previous experimental work, which mostly demonstrated a close match between the measured and predicted execution times, this article shows that there are several situations in which the models do not precisely predict the actual runtime behavior of an algorithm implementation. Second, a comparison between the models is provided in order to determine the model that induces that most efficient algorithms. Lastly, the performance achieved by the model-derived algorithms is compared with the performance attained by machine-specific algorithms in order to examine the effectiveness of deriving fast algorithms through the formalisms of the models.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "experimentation; performance", subject = "{\bf C.1.4} Computer Systems Organization, PROCESSOR ARCHITECTURES, Parallel Architectures. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Modeling techniques. {\bf D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent Programming, Parallel programming.", } @Article{Bhatti:1998:CSC, author = "Nina T. Bhatti and Matti A. Hiltunen and Richard D. Schlichting and Wanda Chiu", title = "{Coyote}: a system for constructing fine-grain configurable communication services", journal = j-TOCS, volume = "16", number = "4", pages = "321--366", month = nov, year = "1998", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Jul 26 16:27:34 MDT 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1998-16-4/p321-bhatti/", abstract = "Communication-oriented abstractions such as atomic multicast, group RPC, and protocols for location-independent mobile computing can simplify the development of complex applications built on distributed systems. This article describes Coyote, a system that supports the construction of highly modular and configurable versions of such abstractions. Coyote extends the notion of protocol objects and hierarchical composition found in existing systems with support for finer-grain microprotocol objects and a nonhierarchical composition scheme for use within a single layer of a protocol stack. A customized service is constructed by selecting microprotocols based on their semantic guarantees and configuring them together with a standard runtime system to form a composite protocol implementing the service. This composite protocol is then composed hierarchically with other protocols to form a complete network subsystem. The overall approach is described and illustrated with examples of services that have been constructed using Coyote, including atomic multicast, group RPC, membership, and mobile computing protocols. A prototype implementation based on extending {\em x\/}-kernel version 3.2 running on Mach 3.0 with support for microprotocols is also presented, together with performance results from a suite of microprotocols from which over 60 variants of group RPC can be constructed.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "design; experimentation; performance; reliability", subject = "{\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols, Protocol architecture. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed applications. {\bf D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent Programming, Distributed programming. {\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Network communication. {\bf D.4.5} Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf D.4.7} Software, OPERATING SYSTEMS, Organization and Design, Distributed systems. {\bf D.2.13} Software, SOFTWARE ENGINEERING, Reusable Software.", } @Article{Epema:1998:DUS, author = "D. H. J. Epema", title = "Decay-usage scheduling in multiprocessors", journal = j-TOCS, volume = "16", number = "4", pages = "367--415", month = nov, year = "1998", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Jul 26 16:27:34 MDT 1999", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1998-16-4/p367-epema/", abstract = "Decay-usage scheduling is a priority-aging time-sharing scheduling policy capable of dealing with a workload of both interactive and batch jobs by decreasing the priority of a job when it acquires CPU time, and by increasing its priority when it does not use the (a) CPU. In this article we deal with a decay-usage scheduling policy in multiprocessors modeled after widely used systems. The priority of a job consists of a base priority and a time-dependent component based on processor usage. Because t he priorities in our model are time dependent, a queuing-theoretic analysis---for instance, for the mean job response time---seems impossible. Still, it turns out that as a consequence of the scheduling policy, the shares of the available CPU time obtained by jobs converge, and a deterministic analysis for these shares is feasible: We show how for a fixed set of jobs with large processing demands, the steady-state shares can be obtained given the base priorities, and conversely, how to set the base priorities given the required shares. In addition, we analyze the relation between the values of the scheduler parameters and the level of control it can exercise over the steady-state share ratios, and we deal with the rate of convergence. We validate the model by simulations and by measurements of actual systems.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "measurement; performance", subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Multiprocessing/multiprogramming/multitasking. {\bf D.4.1} Software, OPERATING SYSTEMS, Process Management, Scheduling. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Measurements. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Modeling and prediction. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance, Simulation.", } @Article{Srinivasan:1999:FAL, author = "V. Srinivasan and G. Varghese", title = "Fast address lookups using controlled prefix expansion", journal = j-TOCS, volume = "17", number = "1", pages = "1--40", month = feb, year = "1999", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Sep 26 07:54:31 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/citations/journals/tocs/1999-17-1/p1-srinivasan/", abstract = "Internet (IP) address lookup is a major bottleneck in high-performance routers. IP address lookup is challenging because it requires a {\em longest matching prefix\/} lookup. It is compounded by increasing routing table sizes, increased traffic, higher-speed links, and the migration to 128-bit IPv6 addresses. We describe how IP lookups and updates can be made faster using a set of transformation techniques. Our main technique, {\em controlled prefix expansion}, transforms a set of prefixes into an equivalent set with fewer prefix lengths. In addition, we use optimization techniques based on dynamic programming, and local transformations of data structures to improve cache behavior. When applied to trie search, our techniques provide a range of algorithms ({\em Expanded Tries\/}) whose performance can be tuned. For example, using a processor with 1MB of L2 cache, search of the MaeEast database containing 38000 prefixes can be done in 3 L2 cache accesses. On a 300MHz Pentium II which takes 4 cycles for accessing the first word of the L2 cacheline, this algorithm has a worst-case search time of 180 nsec., a worst-case insert/delete time of 2.5 msec., and an average insert/delete time of 4 usec. Expanded tries provide faster search {\em and\/} faster insert/delete times than earlier lookup algorithms. When applied to Binary Search on Levels, our techniques improve worst-case search times by nearly a factor of 2 (using twice as much storage) for the MaeEast database. Our approach to algorithm design is based on measurements using the VTune tool on a Pentium to obtain dynamic clock cycle counts. Our techniques also apply to similar address lookup problems in other network protocols.", acknowledgement = ack-nhfb, generalterms = "Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "binary search on levels; controlled prefix expansion; expanded tries; Internet address lookup; longest-prefix match; multibit tries; router performance", subject = "Computer Systems Organization --- Computer-Communication Networks --- Local and Wide-Area Networks (C.2.5): {\bf Internet}; Computer Systems Organization --- Computer-Communication Networks --- Network Protocols (C.2.2): {\bf Routing protocols}; Computer Systems Organization --- Computer-Communication Networks --- Internetworking (C.2.6): {\bf Routers}", } @Article{Birman:1999:BM, author = "Kenneth P. Birman and Mark Hayden and Oznur Ozkasap and Zhen Xiao and Mihai Budiu and Yaron Minsky", title = "Bimodal multicast", journal = j-TOCS, volume = "17", number = "2", pages = "41--88", month = may, year = "1999", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Sep 26 07:54:31 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/citations/journals/tocs/1999-17-2/p41-birman/", abstract = "There are many methods for making a multicast protocol ``reliable.'' At one end of the spectrum, a reliable multicast protocol might offer atomicity guarantees, such as all-or-nothing delivery, delivery ordering, and perhaps additional properties such as virtually synchronous addressing. At the other are protocols that use local repair to overcome transient packet loss in the network, offering ``best effort'' reliability. Yet none of this prior work has treated stability of multicast delivery as a basic reliability property, such as might be needed in an internet radio, television, or conferencing application. This article looks at reliability with a new goal: development of a multicast protocol which is reliable in a sense that can be rigorously quantified and includes throughput stability guarantees. We characterize this new protocol as a ``bimodal multicast'' in reference to its reliability model, which corresponds to a family of bimodal probability distributions. Here, we introduce the protocol, provide a theoretical analysis of its behavior, review experimental results, and discuss some candidate applications. These confirm that bimodal multicast is reliable, scalable, and that the protocol provides remarkably stable delivery throughput.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", subject = "Computer Systems Organization --- Computer-Communication Networks --- Network Architecture and Design (C.2.1): {\bf Network communications}", } @Article{Diniz:1999:ESO, author = "Pedro C. Diniz and Martin C. Rinard", title = "Eliminating synchronization overhead in automatically parallelized programs using dynamic feedback", journal = j-TOCS, volume = "17", number = "2", pages = "89--132", month = may, year = "1999", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Sep 26 07:54:31 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/citations/journals/tocs/1999-17-2/p89-diniz/", abstract = "This article presents dynamic feedback, a technique that enables computations to adapt dynamically to different execution environments. A compiler that uses dynamic feedback produces several different versions of the same source code; each version uses a different optimization policy. The generated code alternately performs sampling phases and production phases. Each sampling phase measures the overhead of each version in the current environment. Each production phase uses the version with the least overhead in the previous sampling phase. The computation periodically resamples to adjust dynamically to changes in the environment. We have implemented dynamic feedback in the context of a parallelizing compiler for object-based programs. The generated code uses dynamic feedback to automatically choose the best synchronization optimization policy. Our experimental results show that the synchronization optimization policy has a significant impact on the overall performance of the computation, that the best policy varies from program to program, that the compiler is unable to statically choose the best policy, and that dynamic feedback enables the generated code to exhibit performance that is comparable to that of code that has been manually tuned to use the best policy. We have also performed a theoretical analysis which provides, under certain assumptions, a guaranteed optimality bound for dynamic feedback relative to a hypothetical (and unrealizable) optimal algorithm that uses the best policy at every point during the execution.", acknowledgement = ack-nhfb, generalterms = "Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "parallel computing; parallelizing compilers", subject = "Computer Systems Organization --- Performance of Systems (C.4): {\bf Measurement techniques}; Software --- Programming Techniques --- Concurrent Programming (D.1.3); Software --- Programming Techniques --- Object-oriented Programming (D.1.5); Software --- Programming Languages --- Processors (D.3.4): {\bf Code generation}; Software --- Programming Languages --- Processors (D.3.4): {\bf Compilers}; Software --- Programming Languages --- Processors (D.3.4): {\bf Optimization}; Software --- Programming Languages --- Processors (D.3.4): {\bf Run-time environments}; Software --- Programming Techniques --- Concurrent Programming (D.1.3): {\bf Parallel programming}; Computer Systems Organization --- Performance of Systems (C.4): {\bf Design studies}", } @Article{Ronsse:1999:RFI, author = "Michiel Ronsse and Koen {De Bosschere}", title = "{RecPlay}: a fully integrated practical record\slash replay system", journal = j-TOCS, volume = "17", number = "2", pages = "133--152", month = may, year = "1999", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Sep 26 07:54:31 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/citations/journals/tocs/1999-17-2/p133-ronsse/", abstract = "This article presents a practical solution for the cyclic debugging of nondeterministic parallel programs. The solution consists of a combination of record\slash replay with automatic on-the-fly data race detection. This combination enables us to limit the record phase to the more efficient recording of the synchronization operations, while deferring the time-consuming data race detection to the replay phase. As the record phase is highly efficient, there is no need to switch it off, hereby eliminating the possibility of Heisenbugs because tracing can be left on all the time. This article describes an implementation of the tools needed to support RecPlay.", acknowledgement = ack-nhfb, generalterms = "Algorithms; Experimentation; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "binary code modification; multithreaded programming; race detection", subject = "Software --- Programming Techniques --- Concurrent Programming (D.1.3): {\bf Parallel programming}; Software --- Software Engineering --- Testing and Debugging (D.2.5): {\bf Debugging aids}; Software --- Software Engineering --- Testing and Debugging (D.2.5): {\bf Monitors}; Software --- Software Engineering --- Testing and Debugging (D.2.5): {\bf Tracing}; Software --- Operating Systems --- Process Management (D.4.1): {\bf Concurrency}; Software --- Operating Systems --- Process Management (D.4.1): {\bf Deadlocks}; Software --- Operating Systems --- Process Management (D.4.1): {\bf Multiprocessing/multiprogramming/multitasking}; Software --- Operating Systems --- Process Management (D.4.1): {\bf Mutual exclusion}; Software --- Operating Systems --- Process Management (D.4.1): {\bf Synchronization}", } @Article{Amsaleg:1999:GCC, author = "Laurent Amsaleg and Michael J. Franklin and Olivier Gruber", title = "Garbage collection for a client-server persistent object store", journal = j-TOCS, volume = "17", number = "3", pages = "153--201", month = aug, year = "1999", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Sep 26 07:54:31 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/citations/journals/tocs/1999-17-3/p153-amsaleg/", abstract = "We describe an efficient server-based algorithm for garbage collecting persistent object stores in a client-server environment. The algorithm is incremental and runs concurrently with client transactions. Unlike previous algorithms, it does not hold any transactional locks on data and does non require callbacks to clients. It is fault-tolerant, but performs very little logging. The algorithm has been designed to be integrated into existing systems, and therefore it works with standard implementation techniques such as Two-Phase Locking and Write-Ahead-Logging. In addition, it supports client-server performance optimizations such as client caching and flexible management of client buffers. We describe an implementation of the algorithm in the EXODUS storage manager and present the results of a performance study of the implementation.", acknowledgement = ack-nhfb, generalterms = "Algorithms; Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "client-server system; logging; persistent object-store; recovery", subject = "Software --- Operating Systems --- Storage Management (D.4.2): {\bf Garbage collection}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Distributed databases}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Object-oriented databases}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Transaction processing}", } @Article{Raghavachari:1999:ALP, author = "Mukund Raghavachari and Anne Rogers", title = "{Ace}: a language for parallel programming with customizable protocols", journal = j-TOCS, volume = "17", number = "3", pages = "202--248", month = aug, year = "1999", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Sep 26 07:54:31 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/citations/journals/tocs/1999-17-3/p202-raghavachari/", abstract = "Customizing the protocols that manage accesses to different data structures within an application can improve the performance of software shared-memory programs substantially. Existing systems for using customizable protocols are hard to use directly because the mechanisms they provide for manipulating protocols are low-level ones. This article is an in-depth study of the issues involved in providing language support for application-specific protocols. We describe the design and implementation of a new language for parallel programming, Ace, that integrates support for customizable protocols with minimal extensions to C. Ace applications are developed using a shared-memory model with a default sequentially consistent protocol. Performance can then be optimized, with minor modifications to the application, by experimenting with different protocol libraries. The design of Ace was driven by a detailed study of the use of customizable protocols. We delineate the issues that arise when programming with customizable protocols and present novel abstractions that allow for their easy use. We describe the design and implementation of a runtime system and compiler for Ace nd discuss compiler optimizations that improve the performance of such software shared-memory systems. We study the communication patterns of a set of benchmark applications and consider the use of customizable protocols to optimize their performance. We evaluate the performance of our system through experiments on a Thinking Machine CM-5 and a Cray T3E. We also present measurements that demonstrate that Ace has good performance compared to that of a modern distributed shared-memory system.", acknowledgement = ack-nhfb, generalterms = "Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "parallel processing", subject = "Software --- Programming Languages --- Language Constructs and Features (D.3.3); Software --- Programming Languages --- Processors (D.3.4): {\bf Compilers}; Software --- Programming Languages --- Processors (D.3.4): {\bf Run-time environments}; Software --- Programming Languages --- Language Classifications (D.3.2); Software --- Programming Techniques --- Concurrent Programming (D.1.3): {\bf Parallel programming}", } @Article{Hari:1999:APS, author = "Adiseshu Hari and George Varghese and Guru Parulkar", title = "An architecture for packet-striping protocols", journal = j-TOCS, volume = "17", number = "4", pages = "249--287", month = nov, year = "1999", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Sep 26 07:54:31 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/articles/journals/tocs/1999-17-4/p249-hari/p249-hari.pdf; http://www.acm.org/pubs/citations/journals/tocs/1999-17-4/p249-hari/", abstract = "Link-striping algorithms are often used to overcome transmission bottlenecks in computer networks. Traditional striping algorithms suffer from two major disadvantages. They provide inadequate load sharing in the presence of variable-length packets, and may result in non-FIFO delivery of data. We describe a new family of link-striping algorithms that solves both problems. Our scheme applies to any layer that can provide multiple FIFO channels. We deal with variable-sized packets by showing how fair-queuing algorithms can be transformed into load-sharing algorithms. Our transformation results in practical load-sharing protocols, and shows a theoretical connection between two seemingly different problems. The same transformation can be applied to obtain load-sharing protocols for links with different capacities. We deal with the FIFO requirement for two separate cases. If a sequence number can be added to each packet, we show how to speed up packet processing by letting the receiver simulate the sender algorithm. If no header can be added, we show how to provide quasi FIFO delivery. Quasi FIFO is FIFO except during occasional periods of loss of synchronization. We argue that quasi FIFO is adequate for most applications. We also describe a simple technique for speedy restoration of synchronization in the event of loss. We develop an architectural framework for transparently embedding our protocol at the network level by striping IP packets across multiple physical interfaces. The resulting stripe protocol has been implemented within the NetBSD kernel. Our measurements and simulations show that the protocol offers scalable throughput even when striping is done over dissimilar links, and that the protocol synchronized quickly after packet loss. Measurements show performance improvements over conventional round-robin striping schemes and striping schemes that do not resequence packets. Some aspects of our solution have been implemented in Cisco's router operating system (IOS 11.3) in the context of Multilink PPP striping.", acknowledgement = ack-nhfb, generalterms = "Algorithms; Design; Measurement; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "causal fair queuing; fair queuing; load sharing; multilink PPP; packet striping; stripe protocol; striping", subject = "Computer Systems Organization --- Computer-Communication Networks --- Network Protocols (C.2.2): {\bf Protocol architecture}", } @Article{McKinley:1999:QLN, author = "Kathryn S. McKinley and Olivier Temam", title = "Quantifying loop nest locality using {SPEC'95} and the {Perfect} benchmarks", journal = j-TOCS, volume = "17", number = "4", pages = "288--336", month = nov, year = "1999", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Sep 26 07:54:31 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/citations/journals/tocs/1999-17-4/p288-mckinley/", abstract = "This article analyzes and quantifies the locality characteristics of numerical loop nests in order to suggest future directions for architecture and software cache optimizations. Since most programs spend the majority of their time in nests, the vast majority of cache optimization techniques target loop nests. In contrast, the locality characteristics that drive these optimizations are usually collected across the entire application rather than at the nest level. Researchers have studied numerical codes for so long that a number of commonly held assertions have emerged on their locality characteristics. In light of these assertions, we use the SPEC'95 and Perfect Benchmarks to take a new look at measuring locality on numerical codes based on references, loop nests, and program locality properties. Our results show that several popular assertions are at best overstatements. For example, although most reuse is within a loop nest, in line with popular assertions, most misses are internest capacity misses, and they correspond to potential reuse between nearby loop nests. In addition, we find that temporal and spatial reuse have balanced roles within a loop nest and that most reuse across nests and the entire program is temporal. These results are consistent with high hit rates (80\% or more hits), but go against the commonly held assumption that spatial reuse dominates. Our locality measurements reveal important differences between loop nests and programs, refute some popular assertions, and provide new insights for the compiler writer and the architect.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", subject = "Computer Systems Organization --- Performance of Systems (C.4): {\bf Performance attributes}; Computer Systems Organization --- Performance of Systems (C.4): {\bf Measurement techniques}", } @Article{Rinard:1999:EFG, author = "Martin C. Rinard", title = "Effective fine-grain synchronization for automatically parallelized programs using optimistic synchronization primitives", journal = j-TOCS, volume = "17", number = "4", pages = "337--371", month = nov, year = "1999", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Sep 26 07:54:31 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/citations/journals/tocs/1999-17-4/p337-rinard/", abstract = "This article presents our experience using optimistic synchronization to implement fine-grain atomic operations in the context of a parallelizing compiler for irregular, object-based computations. Our experience shows that the synchronization requirements of these programs differ significantly from those of traditional parallel computations, which use loop nests to access dense matrices using affine access functions. In addition to coarse-grain barrier synchronization, our irregular computations require synchronization primitives that support efficient fine-grain atomic operations. The standard implementation mechanism for atomic operations uses mutual exclusion locks. But the overhead of acquiring and releasing locks can reduce the performance. Locks can also consume significant amounts of memory. Optimistic synchronization primitives such as {\em load-linked/store conditional\/} are an attractive alternative. They require no additional memory and eliminate the use of heavyweight blocking synchronization constructs. We evaluate the effectiveness of optimistic synchronization by comparing experimental results from two versions of a parallelizing compiler for irregular, object-based computations. One version generates code that uses mutual exclusion locks to make operations execute atomically. The other version generates code that uses mutual exclusion locks to make operations execute atomically. The other version uses optimistic synchronization. We used this compiler to automatically parallelize three irregular, object-based benchmark applications of interest to the scientific and engineering computation community. The presented experimental results indicate that the use of optimistic synchronization in this context can significantly reduce the memory consumption and improve the overall performance.", acknowledgement = ack-nhfb, generalterms = "Algorithms; Experimentation; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "atomic operations commutativity analysis; optimistic synchronization; parallel computing; parallelizing compilers; synchronization", subject = "Software --- Programming Languages --- Processors (D.3.4): {\bf Compilers}", } @Article{Keleher:2000:HLA, author = "Peter J. Keleher", title = "A high-level abstraction of shared accesses", journal = j-TOCS, volume = "18", number = "1", pages = "1--36", month = feb, year = "2000", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Sep 26 07:54:31 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/citations/journals/tocs/2000-18-1/p1-keleher/", abstract = "We describe the design and use of the {\em tape\/} mechanism, a new high-level abstraction of accesses to shared data for software DSMs. Tapes consolidate and generalize a number of recent protocol optimizations, including update-based locks and recorded-replay barriers. Tapes are usually created by ``recording'' shared accesses. The resulting recordings can be used to anticipate future accesses by tailoring data movement to application semantics. Tapes-based mechanisms are layered on top of existing shared-memory protocols, and are largely independent of the underlying memory model. Tapes can also be used to emulate the data-movement semantics of several update-based protocol implementations, without altering the underlying protocol implementation. We have used tapes to create the Tapeworm synchronization library. Tapeworm implements sophisticated record-replay mechanisms across barriers, augments locks with data-movement semantics, and allows the use of producer-consumer segments, which move entire modified segments when any portion of the segment is accessed. We show that Tapeworm eliminates 85\% of remote misses, reduces message traffic by 63\%, and improves performance by an average of 29\% for our application suite.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "DSM; programming libraries; shared memory; update protocols", subject = "Software --- Operating Systems --- Storage Management (D.4.2); Software --- Operating Systems --- File Systems Management (D.4.3); Software --- Operating Systems --- File Systems Management (D.4.3): {\bf Access methods}; Software --- Operating Systems --- File Systems Management (D.4.3): {\bf Distributed file systems}", } @Article{Pai:2000:ILU, author = "Vivek S. Pai and Peter Druschel and Willy Zwaenepoel", title = "{IO-Lite}: a unified {I/O} buffering and caching system", journal = j-TOCS, volume = "18", number = "1", pages = "37--66", month = feb, year = "2000", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Sep 26 07:54:31 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/citations/journals/tocs/2000-18-1/p37-pai/", abstract = "This article presents the design, implementation, and evaluation of IO -Lite, a unified I/O buffering and caching system for general-purpose operating systems. IO-Lite unifies {\em all\/} buffering and caching in the system, to the extent permitted by the hardware. In particular, it allows applications, the interprocess communication system, the file system, the file cache, and the network subsystem to safely and concurrently share a single physical copy of the data. Protection and security are maintained through a combination of access control and read-only sharing. IO-Lite eliminates all copying and multiple buffering of I/O data, and enables various cross-subsystem optimizations. Experiments with a Web server show performance improvements between 40 and 80\% on real workloads as a result of IO-Lite.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "caching; I/O buffering; networking; zero-copy", subject = "Software --- Operating Systems --- Communications Management (D.4.4); Software --- Operating Systems --- Performance (D.4.8)", } @Article{Schwartz:2000:SPA, author = "Beverly Schwartz and Alden W. Jackson and W. Timothy Strayer and Wenyi Zhou and R. Dennis Rockwell and Craig Partbridge", title = "Smart packets: applying active networks to network management", journal = j-TOCS, volume = "18", number = "1", pages = "67--88", month = feb, year = "2000", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Sep 26 07:54:31 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/citations/journals/tocs/2000-18-1/p67-schwartz/", abstract = "This article introduces Smart Packets and describes the smart Packets architecture, the packet formats, the language and its design goals, and security considerations. Smart Packets is an Active Networks project focusing on applying active networks technology to network management and monitoring. Messages in active networks are programs that are executed at nodes on the path to one or more target hosts. Smart Packets programs are written in a tightly encoded, safe language specifically designed to support network management and avoid dangerous constructs and accesses. Smart Packets improves the management of large complex networks by (1) moving management decision points closer to the node being managed, (2) targeting specific aspects of the node for information rather than exhaustive collection via polling, and (3) abstracting the management concepts to language constructs, allowing nimble network control.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "active networks", subject = "Computer Systems Organization --- Computer-Communication Networks --- Network Architecture and Design (C.2.1); Computer Systems Organization --- Computer-Communication Networks --- Network Operations (C.2.3); Software --- Programming Languages --- Language Constructs and Features (D.3.3)", } @Article{Brooks:2000:VBC, author = "David Brooks and Margaret Martonosi", title = "Value-based clock gating and operation packing: dynamic strategies for improving processor power and performance", journal = j-TOCS, volume = "18", number = "2", pages = "89--126", month = may, year = "2000", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Sep 26 07:54:31 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/citations/journals/tocs/2000-18-2/p89-brooks/", abstract = "The large address space needs of many current applications have pushed processor designs toward 64-bit word widths. Although full 64-bit addresses and operations are indeed sometimes needed, arithmetic operations on much smaller quantities are still more common. In fact, another instruction set trend has been the introduction of instructions geared toward subword operations on 16-bit quantities. For examples, most major processors now include instruction set support for multimedia operations allowing parallel execution of several subword operations in the same ALU. This article presents our observations demonstrating that operations on ``narrow-width'' quantities are common not only in multimedia codes, but also in more general workloads. In fact, across the SPECint95 benchmarks, over half the integer operation executions require 16 bits or less. Based on this data, we propose two hardware mechanisms that dynamically recognize and capitalize on these narrow-width operations. The first, power-oriented optimization reduces processor power consumption by using operand-value-based clock gating to turn off portions of arithmetic units that will be unused by narrow-width operations. This optimization results in a 45\%--60\% reduction in the integer unit's power consumption for the SPECint95 and MediaBench benchmark suites. Applying this optimization to SPECfp95 benchmarks results in slightly smaller power reductions, but still seems warranted. These reductions in integer unit power consumption equate to a 5\%--10\% full-chip power savings. Our second, performance-oriented optimization improves processor performance by packing together narrow-width operations so that they share a single arithmetic unit. Conceptually similar to a dynamic form of MMX, this optimization offers speedups of 4.3\%--6.2\% for SPECint95 and 8.0\%--10.4\% for MediaBench. \par Overall, these optimizations highlight an increasing opportunity for value-based optimizations to improve both power and performance in current microprocessors.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", subject = "Hardware --- Arithmetic and Logic Structures (B.2); Computer Systems Organization --- Processor Architectures --- Single Data Stream Architectures (C.1.1): {\bf RISC/CISC, VLIW architectures}", } @Article{Ganger:2000:SUS, author = "Gregory R. Ganger and Marshall Kirk McKusick and Craig A. N. Soules and Yale N. Patt", title = "Soft updates: a solution to the metadata update problem in file systems", journal = j-TOCS, volume = "18", number = "2", pages = "127--153", month = may, year = "2000", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Sep 26 07:54:31 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib", URL = "http://www.acm.org/pubs/citations/journals/tocs/2000-18-2/p127-ganger/", abstract = "Metadata updates, such as file creation and block allocation, have consistently been identified as a source of performance, integrity, security, and availability problems for files systems. Soft updates is an implementation technique for low-cost sequencing of fine-grained updates to write-back cache blocks. Using soft updates to track and enforce metadata update dependencies, a file system can safely use delayed writes for almost all file operations. This article describes soft updates, their incorporation into the 4.4BSD fast file system, and the resulting effects on the system. We show that a disk-based file system using soft updates achieves memory-based file system performance while providing stronger integrity and security guarantees than most disk-based file systems. For workloads that frequently perform updates on metadata (such as creating and deleting files), this improves performance by more than a factor of two, a factor of 20 when compared to the conventional synchronous write approach, and by 4--19\% when compared to an aggressive write-ahead logging approach. In addition, soft updates can improve file system availability by relegating crash-recovery assistance (e.g., the {\em fsck\/} utility) to an optional and background role, reducing file system recovery time to less than one second.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", subject = "Computer Systems Organization --- Performance of Systems (C.4): {\bf Design studies}; Computer Systems Organization --- Performance of Systems (C.4): {\bf Reliability, availability, and serviceability}; Computer Systems Organization --- Computer System Implementation --- Servers (C.5.5); Software --- Operating Systems --- Storage Management (D.4.2); Software --- Operating Systems --- File Systems Management (D.4.3); Data --- Files (E.5); Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2)", } @Article{Yeung:2000:MSM, author = "Donald Yeung and John Kubiatowicz and Anant Agarwal", title = "Multigrain shared memory", journal = j-TOCS, volume = "18", number = "2", pages = "154--196", month = may, year = "2000", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Sep 26 07:54:31 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/citations/journals/tocs/2000-18-2/p154-yeung/", abstract = "Parallel workstations, each comprising tens of processors based on shared memory, promise cost-effective scalable multiprocessing. This article explores the coupling of such small- to medium-scale shared-memory multiprocessors through software over a local area network to synthesize larger shared-memory systems. We call these systems Distributed Shared-memory MultiProcessors (DSMPs). This article introduces the design of a shared-memory system that uses multiple granularities of sharing, called MGS, and presents a prototype implementation of MGS on the MIT Alewife multiprocessor. Multigrain shared memory enables the collaboration of hardware and software shared memory, thus synthesizing a single transparent shared-memory address space across a cluster of multiprocessors. The system leverages the efficient support for fine-grain cache-line sharing within multiprocessor nodes as often as possible, and resorts to coarse-grain page-level sharing across nodes only when absolutely necessary. Using our prototype implementation of MGS, an in-depth study of several shared-memory application is conducted to understand the behavior of DSMPs. Our study is the first to comprehensively explore the DSMP design space, and to compare the performance of DSMPs against all-software and all-hardware DSMs on a single experimental platform. Keeping the total number of processors fixed, we show that applications execute up to 85\% faster on a DSMP as compared to an all-software DSM. We also show that all-hardware DSMs hold a significant performance advantage over DSMPs on challenging applications, between 159\% and 1014\%. However, program transformations to improve data locality for these applications allow DSMPs to almost match the performance of an all-hardware multiprocessor of the same size.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", subject = "Hardware --- Memory Structures --- Design Styles (B.3.2): {\bf Shared memory}; Computer Systems Organization --- Processor Architectures --- Multiple Data Stream Architectures (Multiprocessors) (C.1.2)", } @Article{Aron:2000:STE, author = "Mohit Aron and Peter Druschel", title = "Soft timers: efficient microsecond software timer support for network processing", journal = j-TOCS, volume = "18", number = "3", pages = "197--228", year = "2000", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jul 18 10:18:45 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/articles/journals/tocs/2000-18-3/p197-aron/p197-aron.pdf; http://www.acm.org/pubs/citations/journals/tocs/2000-18-3/p197-aron/", abstract = "This paper proposes and evaluates soft timers, a new operating system facility that allows the efficient scheduling of software events at a granularity down to tens of microseconds. Soft timers can be used to avoid interrupts and reduce context switches associated with network processing, without sacrificing low communication delays. More specifically, soft timers enable transport protocols like TCP to efficiently perform rate-based clocking of packet transmissions. Experiments indicate that soft timers allow a server to employ rate-based clocking with little CPU overhead (2-6\%) at high aggregate bandwidths. Soft timers can also be used to perform network polling, which eliminates network interrupts and increases the memory access locality of the network subsystem without sacrificing delay. Experiments show that this technique can improve the throughput of a Web server by up to 25\%.", acknowledgement = ack-nhfb, generalterms = "Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "polling; timers; transmission scheduling", subject = "Computer Systems Organization --- Computer System Implementation --- Servers (C.5.5); Software --- Operating Systems --- Process Management (D.4.1): {\bf Scheduling}; Software --- Operating Systems --- Communications Management (D.4.4): {\bf Network communication}", } @Article{Govil:2000:CDR, author = "Kingshuk Govil and Dan Teodosiu and Yongqiang Huang and Mendel Rosenblum", title = "Cellular disco: resource management using virtual clusters on shared-memory multiprocessors", journal = j-TOCS, volume = "18", number = "3", pages = "229--262", year = "2000", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Nov 13 18:22:48 MST 2000", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/citations/journals/tocs/2000-18-3/p229-govil/", abstract = "Despite the fact that large-scale shared-memory multiprocessors have been commercially available for several years, system software that fully utilizes all their features is still not available, mostly due to the complexity and cost of making the required changes to the operating system. A recently proposed approach, called Disco, substantially reduces this development cost by using a virtual machine monitor that leverages the existing operating system technology. In this paper we present a system called Cellular Disco that extends the Disco work to provide all the advantages of the hardware partitioning and scalable operating system approaches. We argue that Cellular Disco can achieve these benefits at only a small fraction of the development cost of modifying the operating system. Cellular Disco effectively turns a large-scale shared-memory multiprocessor into a virtual cluster that supports fault containment and heterogeneity, while avoiding operating system scalability bottlenecks. Yet at the same time, Cellular Disco preserves the benefits of a shared-memory multiprocessor by implementing dynamic, fine-grained resource sharing, and by allowing users to overcommit resources such as processors and memory. This hybrid approach requires a scalable resource manager that makes local decisions with limited information while still providing good global performance and fault containment. In this paper we describe our experience with a Cellular Disco prototype on a 32-processor SGI Origin 2000 system. We show that the execution time penalty for this approach is low, typically within 10\% of the best available commercial operating system foremost workloads, and that it can manage the CPU and memory resources of the machine significantly better than the hardware partitioning approach.", acknowledgement = ack-nhfb, generalterms = "Design; Management; Performance; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "fault containment; resource management; scalable multiprocessors; virtual machines", subject = "Software --- Operating Systems --- Process Management (D.4.1); Software --- Operating Systems --- Storage Management (D.4.2); Software --- Operating Systems --- Reliability (D.4.5); Computer Systems Organization --- Processor Architectures (C.1)", } @Article{Kohler:2000:CMR, author = "Eddie Kohler and Robert Morris and Benjie Chen and John Jannotti and M. Frans Kaashoek", title = "The click modular router", journal = j-TOCS, volume = "18", number = "3", pages = "263--297", year = "2000", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Nov 13 18:22:48 MST 2000", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/citations/journals/tocs/2000-18-3/p263-kohler/", abstract = "Clicks is a new software architecture for building flexible and configurable routers. A Click router is assembled from packet processing modules called {\em elements}. Individual elements implement simple router functions like packet classification, queuing, scheduling, and interfacing with network devices. A router configurable is a directed graph with elements at the vertices; packets flow along the edges of the graph. Several features make individual elements more powerful and complex configurations easier to write, including {\em pull connections}, which model packet flow driven by transmitting hardware devices, and {\em flow-based router context}, which helps an element locate other interesting elements. Click configurations are modular and easy to extend. A standards-compliant Click IP router has 16 elements on its forwarding path; some of its elements are also useful in Ethernet switches and IP tunnelling configurations. Extending the IP router to support dropping policies, fairness among flows, or Differentiated Services simply requires adding a couple of element at the right place. On conventional PC hardware, the Click IP router achieves a maximum loss-free forwarding rate of 333,000 64-byte packets per second, demonstrating that Click's modular and flexible architecture is compatible with good performance.", acknowledgement = ack-nhfb, generalterms = "Design; Management; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "component systems; routers; software router performance", subject = "Computer Systems Organization --- Computer-Communication Networks --- Network Architecture and Design (C.2.1): {\bf Packet-switching networks}; Computer Systems Organization --- Computer-Communication Networks --- Internetworking (C.2.6): {\bf Routers}; Software --- Software Engineering --- Software Architectures (D.2.11): {\bf Domain-specific architectures}", } @Article{Saito:2000:MAP, author = "Yasushi Saito and Brian N. Bershad and Henry M. Levy", title = "Manageability, availability, and performance in {Porcupine}: a highly scalable, cluster-based mail service", journal = j-TOCS, volume = "18", number = "3", pages = "298--298", year = "2000", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Nov 13 18:22:48 MST 2000", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/citations/journals/tocs/2000-18-3/p298-saito/", abstract = "This paper describes the motivation, design and performance of Porcupine, a scalable mail server. The goal of Porcupine is to provide a highly available and scalable electronic mail service using a large cluster of commodity PCs. We designed Porcupine to be easy to manage by emphasizing dynamic load balancing, automatic configuration, and graceful degradation in the presence of failures. Key to the system's manageability, availability, and performance is that sessions, data, and underlying services are distributed homogeneously and dynamically across nodes in a cluster.", acknowledgement = ack-nhfb, generalterms = "Algorithms; Management; Performance; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "cluster; distributed systems; email; group membership protocol; load balancing; replication", subject = "Computer Systems Organization --- Computer-Communication Networks --- Distributed Systems (C.2.4): {\bf Distributed applications}; Computer Systems Organization --- Performance of Systems (C.4): {\bf Reliability, availability, and serviceability}; Computer Systems Organization --- Computer System Implementation --- Servers (C.5.5); Software --- Operating Systems --- Reliability (D.4.5): {\bf Fault-tolerance}; Information Systems --- Information Storage and Retrieval --- Systems and Software (H.3.4): {\bf Distributed systems}; Information Systems --- Information Systems Applications --- Communications Applications (H.4.3): {\bf Electronic mail}", } @Article{Gontmakher:2000:JCN, author = "Alex Gontmakher and Assaf Schuster", title = "{Java} consistency: nonoperational characterizations for {Java} memory behavior", journal = j-TOCS, volume = "18", number = "4", pages = "333--386", year = "2000", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jul 18 10:18:45 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/articles/journals/tocs/2000-18-4/p333-gontmakher/p333-gontmakher.pdf; http://www.acm.org/pubs/citations/journals/tocs/2000-18-4/p333-gontmakher/", abstract = "The Java Language Specification (JLS) [Gosling et al. 1996] provides an operational definition for the consistency of shared variables. The definition remains unchanged in the JLS 2nd edition, currently under peer review, which relies on a specific abstract machine as its underlying model, is very complicated. Several subsequent works have tried to simplify and formalize it. However, these revised definitions are also operational, and thus have failed to highlight the intuition behind the original specification. In this work we provide a complete nonoperational specification for Java and for the JVM, excluding synchronized operations. We provide a simpler definition, in which we clearly distinguish the consistency model that is promised to the programmer from that which should be implemented in the JVM. This distinction, which was implicit in the original definition, is crucial for building the JVM. We find that the programmer model is strictly weaker than that of the JVM, and precisely define their discrepancy. Moreover, our definition is independent of any specific (or even abstract) machine, and can thus be used to verify JVM implementations and compiler optimizations on any platform. Finally, we show the precise range of consistency relaxations obtainable for the Java memory model when a certain compiler optimization-- called {\em prescient stores\/} in JLS--is applicable.", acknowledgement = ack-nhfb, generalterms = "Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "Java memory models; multithreading; nonoperational specification", subject = "Hardware --- Memory Structures --- Performance Analysis and Design Aids** (B.3.3): {\bf Formal models**}", } @Article{Sarkar:2000:HBC, author = "Prasenjit Sarkar and John H. Hartman", title = "Hint-based cooperative caching", journal = j-TOCS, volume = "18", number = "4", pages = "387--419", year = "2000", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jul 18 10:18:45 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/articles/journals/tocs/2000-18-4/p387-sarkar/p387-sarkar.pdf; http://www.acm.org/pubs/citations/journals/tocs/2000-18-4/p387-sarkar/", abstract = "This article presents the design, implementation, and measurement of a hint-based cooperative caching file system. Hints allow clients to make decisions based on local state, enabling a loosely coordinated system that is simple to implement. The resulting performance is comparable to that of existing tightly coordinated algorithms that use global state, but with less overhead. Simulations show that the block access times of our system are as good as those of the existing algorithms, while reducing manager load by more than a factor of seven, block lookup traffic by nearly a factor of two-thirds, and replacement traffic a factor of five. To verify our simulation results in a real system with real users, we implemented a prototype and measured its performance for one week. Although the simulation and prototype environments were very different, the prototype system mirrored the simulation results by exhibiting reduced overhead and high hint accuracy. Furthermore, hint-based cooperative caching reduced the average block access time to almost half that of NFS.", acknowledgement = ack-nhfb, generalterms = "Algorithms; Design; Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "cooperative caching; hints", subject = "Software --- Operating Systems --- File Systems Management (D.4.3)", } @Article{Bilas:2001:ASV, author = "Angelos Bilas and Dongming Jiang and Jaswinder Pal Singh", title = "Accelerating shared virtual memory via general-purpose network interface support", journal = j-TOCS, volume = "19", number = "1", pages = "1--35", year = "2001", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jul 18 10:18:45 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/articles/journals/tocs/2001-19-1/p1-bilas/p1-bilas.pdf; http://www.acm.org/pubs/citations/journals/tocs/2001-19-1/p1-bilas/", abstract = "Clusters of symmetric multiprocessors (SMPs) are important platforms for high-performance computing. With the success of hardware cache-coherent distributed shared memory (DSM), a lot of effort has also been made to support the coherent shared-address-space programming model in software on clusters. Much research has been done in fast communication on clusters and in protocols for supporting software shared memory across them. However, the performance of software virtual memory (SVM) is still far from that achieved on hardware DSM systems. The goal of this paper is to improve the performance of SVM on system area network clusters by considering communication and protocol layer interactions. We first examine what are the important communication system bottlenecks that stand in the way of improving parallel performance of SVM clusters; in particular, which parameters of the communication architecture are most important to improve further relative to processor speed, which ones are already adequate on modern systems for most applications, and how will this change with technology in the future. We find that the most important communication subsystem cost to improve is the overhead of generating and delivery interrupts for asynchronous protocol processing. Then we proceed to show, that by providing simple and general support for asynchronous message handling in a commodity network interface (NI) and by altering SVM protocols appropriately, protocol activity can be decoupled from asynchronous message handling, and the need for interrupts or polling can be eliminated. The NI mechanisms needed are generic, not SVM-dependent. We prototype the mechanisms and such a {\em synchronous home-based LRC\/} protocol, called {\em GeNIMA\/} (GEneral-purpose Network Interface support for shared Memory Abstractions), on a cluster of SMPs with a programmable NI. We find that the performance improvements are substantial, bringing performance on a small-scale SMP cluster much closer to that of hardware-coherent shared memory for many applications, and we show the value of each of the mechanisms in different applications.", acknowledgement = ack-nhfb, generalterms = "Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "applications; clusters; shared virtual memory; system area networks", subject = "Computer Systems Organization --- Performance of Systems (C.4)", } @Article{Grimm:2001:SAC, author = "Robert Grimm and Brian N. Bershad", title = "Separating access control policy, enforcement, and functionality in extensible systems", journal = j-TOCS, volume = "19", number = "1", pages = "36--70", year = "2001", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jul 18 10:18:45 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/articles/journals/tocs/2001-19-1/p36-grimm/p36-grimm.pdf; http://www.acm.org/pubs/citations/journals/tocs/2001-19-1/p36-grimm/", abstract = "Extensible systems, such as Java or the SPIN extensible operating system, allow for units of code, or extensions, to be added to a running system in almost arbitrary fashion. Extensions closely interact through low-latency but type-safe interfaces to form a tightly integrated system. As extensions can come from arbitrary sources, not all of whom can be trusted to conform to an organization's security policy, such structuring raises the question of how security constraints are enforced in an extensible system. In this paper, we present an access control mechanism for extensible systems to address this problem. Our access control mechanism decomposes access control into a policy-neutral enforcement manager and a security policy manager, and it is transparent to extensions in the absence of security violations. It structures the system into protection domains, enforces protection domains through access control checks, and performs auditing of system operations. The access control mechanism works by inspecting extensions for their types and operations to determine which abstractions require protection and by redirecting procedure or method invocations to inject access control operations into the system. We describe the design of this access control mechanism, present an implementation within the SPIN extensible operating systems, and provide a qualitative as well as quantitative evaluation of the mechanism.", acknowledgement = ack-nhfb, generalterms = "Security", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "access check; auditing; extensible systems; Java; policy-neutral enforcement; protection domain; protection domain transfer; security policy; SPIN", subject = "Software --- Operating Systems (D.4); Software --- Operating Systems --- General (D.4.0); Software --- Operating Systems --- Security and Protection (D.4.6): {\bf Access controls}", } @Article{Luk:2001:ACS, author = "Chi-Keung Luk and Todd C. Mowry", title = "Architectural and compiler support for effective instruction prefetching: a cooperative approach", journal = j-TOCS, volume = "19", number = "1", pages = "71--109", year = "2001", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jul 18 10:18:45 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/articles/journals/tocs/2001-19-1/p71-luk/p71-luk.pdf; http://www.acm.org/pubs/citations/journals/tocs/2001-19-1/p71-luk/", abstract = "Instruction cache miss latency is becoming an increasingly important performance bottleneck, especially for commercial applications. Although instruction prefetching is an attractive technique for tolerating this latency, we find that existing prefetching schemes are insufficient for modern superscalar processors, since they fail to issue prefetches early enough (particularly for nonsequential accesses). To overcome these limitations, we propose a new instruction prefetching technique whereby the hardware and software {\em cooperate\/} to hide the latency as follows. The hardware performs aggressive sequential prefetching combined with a novel {\em prefetch filtering\/} mechanism to allow it to get far ahead without polluting the cache. To hide the latency of nonsequential accesses, we propose and implement a novel compiler algorithm which automatically inserts {\em instruction-??\/} --- prefetch the targets of control transfers far enough in advance. Our experimental results demonstrate that this new approach hides 50\% or more of the latency remaining with the best previous techniques, while at the same time reduces the number of useless prefetches by a factor of six. We find that both the {\em prefetch filtering\/} and {\em compiler-inserted prefetching\/} components of our design are essential and complementary, and that the compiler can limit the code expansion to only 9\% on average. In addition, we show that the performance of our technique can be further increased by using profiling information to help reduce cache conflicts and unnecessary prefetches. From an architectural perspective, these performance advantages are sustained over a range of common miss latencies and bandwidth. Finally, our technique is cost effective as well, since it delivers performance comparable to (or even better than) that of larger caches, but requires a much smaller hardware budget.", acknowledgement = ack-nhfb, generalterms = "Design; Experimentation; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "compiler optimization; instruction prefetching", subject = "Software --- Programming Languages --- Processors (D.3.4): {\bf Compilers}; Software --- Programming Languages --- Processors (D.3.4): {\bf Optimization}; Hardware --- Memory Structures --- Design Styles (B.3.2): {\bf Cache memories}", } @Article{Brown:2001:CBP, author = "Angela Demke Brown and Todd C. Mowry and Orran Krieger", title = "Compiler-based {I/O} prefetching for out-of-core applications", journal = j-TOCS, volume = "19", number = "2", pages = "111--170", year = "2001", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jul 18 10:18:45 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/articles/journals/tocs/2001-19-2/p111-brown/p111-brown.pdf; http://www.acm.org/pubs/citations/journals/tocs/2001-19-2/p111-brown/", abstract = "Current operating systems offer poor performance when a numeric application's working set does not fit in main memory. As a result, programmers who wish to solve ``out-of-core'' problems efficiently are typically faced with the onerous task of rewriting an application to use explicit I/O operations (e.g., read/write). In this paper, we propose and evaluate a fully automatic technique which liberates the programmer from this task, provides high performance, and requires only minimal changes to current operating systems. In our scheme the compiler provides the crucial information on future access patterns without burdening the programmer; the operating system supports nonbinding {\em prefetch\/} and {\em release\/} hints for managing I/O; and the operating systems cooperates with a run-time layer to accelerate performance by adapting to dynamic behavior and minimizing prefetch overhead. This approach maintains the abstraction of unlimited virtual memory for the programmer, gives the compiler the flexibility to aggressively insert prefetches ahead of references, and gives the operating system the flexibility to arbitrate between the competing resource demands of multiple applications. We implemented our compiler analysis within the SUIF compiler, and used it to target implementations of our run-time and OS support on both research and commercial systems (Hurricane and IRIX 6.5, respectively). Our experimental results show large performance gains for out-of-core scientific applications on both systems: more than 50\% of the I/O stall time has been eliminated in most cases, thus translating into overall speedups of roughly twofold in many cases.", acknowledgement = ack-nhfb, generalterms = "Design; Experimentation; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "compiler optimization; prefetching; virtual memory", subject = "Software --- Operating Systems --- Storage Management (D.4.2): {\bf Virtual memory}; Software --- Operating Systems --- Performance (D.4.8); Software --- Programming Languages --- Processors (D.3.4): {\bf Compilers}; Software --- Programming Languages --- Processors (D.3.4): {\bf Optimization}", } @Article{Fekete:2001:SUP, author = "Alan Fekete and Nancy Lynch and Alex Shvartsman", title = "Specifying and using a partitionable group communication service", journal = j-TOCS, volume = "19", number = "2", pages = "171--216", year = "2001", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jul 18 10:18:45 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/articles/journals/tocs/2001-19-2/p171-fekete/p171-fekete.pdf; http://www.acm.org/pubs/citations/journals/tocs/2001-19-2/p171-fekete/", abstract = "Group communication services are becoming accepted as effective building blocks for the construction of fault-tolerant distributed applications. Many specifications for group communication services have been proposed. However, there is still no agreement about what these specifications should say, especially in cases where the services are {\em partitionable}, i.e., where communication failures may lead to simultaneous creation of groups with disjoint memberships, such that each group is unaware of the existence of any other group. In this paper, we present a new, succinct specification for a view-oriented partitionable group communication service. The service associates each message with a particular {\em view\/} of the group membership. All send and receive events for a message occur within the associated view. The service provides a total order on the messages within each view, and each processor receives a prefix of this order. Our specification separates safety requirements from performance and fault-tolerance requirements. The safety requirements are expressed by an abstract, global {\em state machine}. To present the performance and fault-tolerance requirements, we include {\em failure-status input actions\/} in the specification; we then give properties saying that consensus on the view and timely message delivery are guaranteed in an execution provided that the execution {\em stabilizes\/} to a situation in which the failure-status stops changing and corresponds to consistently partioned system. Because consensus is not required in every execution, the specification is not subject to the existing impossibility results for partionable systems. Our specification has a simple implementation, based on the membership algorithm of Christian and Schmuck. We show the utility of the specification by constructing an ordered-broadcast application, using an algorithm (based on algorithms of Amir, Dolev, Keidar, and others) that reconciles information derived from different instantiations of the group. The application manages the view-change activity to build a shared sequence of messages, i.e., the per-view total orders of the group service are combined to give a universal total order. We prove the correctness and analyze the performance and fault-tolerance of the resulting application.", acknowledgement = ack-nhfb, generalterms = "Algorithms; Design; Performance; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "composable building blocks; conditional performance analysis; distributed algorithms; group communication protocols; message-passing protocols; ordered broadcast; service specification; total-order broadcast", subject = "Computer Systems Organization --- Computer-Communication Networks --- Distributed Systems (C.2.4); Software --- Operating Systems --- Reliability (D.4.5): {\bf Fault-tolerance}; Software --- Software Engineering --- Software/Program Verification (D.2.4): {\bf Correctness proofs}", } @Article{McNamee:2001:STT, author = "Dylan McNamee and Jonathan Walpole and Calton Pu and Crispin Cowan and Charles Krasic and Ashvin Goel and Perry Wagle and Charles Consel and Gilles Muller and Renauld Marlet", title = "Specialization tools and techniques for systematic optimization of system software", journal = j-TOCS, volume = "19", number = "2", pages = "217--251", year = "2001", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jul 18 10:18:45 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/articles/journals/tocs/2001-19-2/p217-mcnamee/p217-mcnamee.pdf; http://www.acm.org/pubs/citations/journals/tocs/2001-19-2/p217-mcnamee/", abstract = "Specialization has been recognized as a powerful technique for optimizing operating systems. However, specialization has not been broadly applied beyond the research community because current techniques based on manual specialization, are time-consuming and error-prone. The goal of the work described in this paper is to help operating system tuners perform specialization more easily. We have built a specialization toolkit that assists the major tasks of specializing operating systems. We demonstrate the effectiveness of the toolkit by applying it to three diverse operating system components. We show that using tools to assist specialization enables significant performance optimizations without error-prone manual modifications. Our experience with the toolkit suggests new ways of designing systems that combine high performance and clean structure.", acknowledgement = ack-nhfb, generalterms = "Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "operating system specialization; optimization; software architecture", subject = "Software --- Operating Systems --- Organization and Design (D.4.7)", } @Article{Mendelson:2001:ESC, author = "Avi Mendelson and Freddy Gabbay", title = "The effect of seance communication on multiprocessing systems", journal = j-TOCS, volume = "19", number = "2", pages = "252--281", year = "2001", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jul 18 10:18:45 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/articles/journals/tocs/2001-19-2/p252-mendelson/p252-mendelson.pdf; http://www.acm.org/pubs/citations/journals/tocs/2001-19-2/p252-mendelson/", abstract = "This paper introduces the seance communication phenomenon and analyzes its effect on a multiprocessing environment. Seance communication is an unnecessary coherency-related activity that is associated with dead cache information. Dead information may reside in the cache for various reasons: task migration, context switches, or working-set changes. Dead information does not have a significant performance impact on a single-processor system; however, it can dominate the performance of multicache environment. In order to evaluate the overhead of seance communication, we develop an analytical model that is based on the fractal behavior of the memory references. So far, all previous works that used the same modeling approach extracted the fractal parameters of a program manually. This paper provides an additional important contribution by demonstrating how these parameters can be automatically extracted from the program trace. Our analysis indicates that Seance communication may severely reduce the overall system performance when using write-update or write-invalidate cache coherency protocols. In addition, we find that the performance of write-update protocols is affected more severely than write-invalidate protocols. The results that are provided by our model are important for better understanding of the coherency-related overhead in multicache systems and for better development of parallel applications and operating systems.", acknowledgement = ack-nhfb, generalterms = "Design; Experimentation; Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "cache coherency protocols; multicache systems; performance analysis; seance communication", subject = "Hardware --- Memory Structures (B.3); Hardware --- Memory Structures --- Design Styles (B.3.2): {\bf Cache memories}; Computer Systems Organization --- General (C.0); Computer Systems Organization --- Processor Architectures --- Multiple Data Stream Architectures (Multiprocessors) (C.1.2): {\bf Interconnection architectures}", } @Article{Arpaci-Dusseau:2001:ICC, author = "Andrea Carol Arpaci-Dusseau", title = "Implicit coscheduling: coordinated scheduling with implicit information in distributed systems", journal = j-TOCS, volume = "19", number = "3", pages = "283--331", year = "2001", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jul 18 10:18:45 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/citations/journals/tocs/2001-19-3/p283-arpaci-dusseau/", abstract = "In modern distributed systems, coordinated time-sharing is required for communicating processes to leverage the performance of switch-based networks and low-overhead protocols. Coordinated time-sharing has traditionally been achieved with gang scheduling or explicit coscheduling, implementations of which often suffer from many deficiencies: multiple points of failure, high context-switch overheads, and poor interaction with client-server, interactive, and I/O-intensive workloads. {\em Implicit coscheduling\/} dynamically coordinates communicating processes across distributed machines without these structural deficiencies. In implicit coscheduling, no communication is required across operating system schedulers; instead, cooperating processes achieve coordination by reacting to {\em implicit information\/} carried by communication existing within the parallel application. The implementation of this approach is simple and allows participating nodes to act autonomously. We introduce two key mechanisms in implicit coscheduling. The first is {\em conditional two-phase waiting}, a generalization of traditional two-phase waiting in which spin-time may be increased depending upon events occurring while the process waits. The second is an extension to stride scheduling that provides preemption and is fair to processes that block. To demonstrate that implicit coscheduling performs well, we show that results from an extensive set of simulation and implementation experiments. To exercise the conditional two-phase waiting algorithm, we examine three workloads: bulk-synchronous and continuous-communication synthetic applications and application kernels written in the Split-C language. To exercise the local scheduler, we examine competing jobs with different communication characteristics. We demonstrate that our implementation scales well with the number of jobs and workstations and is robust to process placement. Our experiments show that implicit coscheduling is effective and fair for a wide of workloads; most perform within 30\% of an idealized model of gang scheduling.", acknowledgement = ack-nhfb, generalterms = "Algorithms; Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "clusters; coscheduling; gang scheduling; networks of workstations; proportional-share scheduling; two-phase waiting", subject = "Software --- Operating Systems --- Process Management (D.4.1): {\bf Scheduling}; Computer Systems Organization --- Computer-Communication Networks --- Distributed Systems (C.2.4): {\bf Network operating systems}", } @Article{Carzaniga:2001:DEW, author = "Antonio Carzaniga and David S. Rosenblum and Alexander L. Wolf", title = "Design and evaluation of a wide-area event notification service", journal = j-TOCS, volume = "19", number = "3", pages = "332--383", year = "2001", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jul 18 10:18:45 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/citations/journals/tocs/2001-19-3/p332-carzaniga/", abstract = "The components of a loosely coupled system are typically designed to operate by generating and responding to asynchronous events. An {\em event notification service\/} is an application-independent infrastructure that supports the construction of event-based systems, whereby generators of events publish event notifications to the infrastructure and consumers of events subscribe with the infrastructure to receive relevant notification. The two primary services that should be provided to components by the infrastructure are notification selection (i.e., determining which notifications match which subscriptions) and notification delivery (i.e., routing matching notifications from publishers to subscribers). Numerous event notification services have been developed for local-area networks, generally based on a centralized server to select and deliver event notifications. Therefore, they suffer from an inherent inability to scale to wide-area networks, such as the internet, where the number and physical distribution of the service's clients can quickly overwhelm a centralized solution. The critical challenge in the setting of a wide-area network is to maximize the expressiveness in the selection mechanism without sacrificing scalability in the delivery mechanism. This paper presents Siena, an event notification service that we have designed and implemented to exhibit both expressiveness and scalability. We describe the service's interface to applications, the algorithms used by networks of servers to select and deliver event notifications, and the strategies used to optimize performance. We also present results of simulation studies that examine the scalability and performance of the service.", acknowledgement = ack-nhfb, generalterms = "Algorithms; Experimentation; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "content-based addressing and routing; event notification; publish/subscribe", subject = "Computer Systems Organization --- Computer-Communication Networks --- Network Architecture and Design (C.2.1): {\bf Distributed networks}; Computer Systems Organization --- Computer-Communication Networks --- Network Protocols (C.2.2); Computer Systems Organization --- Computer-Communication Networks --- Distributed Systems (C.2.4): {\bf Distributed applications}; Computer Systems Organization --- Computer-Communication Networks --- Local and Wide-Area Networks (C.2.5): {\bf Internet}; Computer Systems Organization --- Computer-Communication Networks --- Internetworking (C.2.6): {\bf Routers}; Computer Systems Organization --- Performance of Systems (C.4): {\bf Design studies}; Computing Methodologies --- Simulation and Modeling --- Applications (I.6.3); Computing Methodologies --- Simulation and Modeling --- Model Validation and Analysis (I.6.4); Computing Methodologies --- Simulation and Modeling --- Types of Simulation (I.6.8): {\bf Discrete event}", } @Article{Maxemchuk:2001:IMS, author = "N. F. Maxemchuk and D. H. Shur", title = "An {Internet} multicast system for the stock market", journal = j-TOCS, volume = "19", number = "3", pages = "384--412", year = "2001", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jul 18 10:18:45 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "http://www.acm.org/pubs/citations/journals/tocs/2001-19-3/p384-maxemchuk/", abstract = "We are moving toward an international, 24-hour, distributed, electronic stock exchange. The exchange will use the global internet, or Internet, technology. This system is a natural application of multicast because there are a large number of receivers that should receive the same information simultaneously. The data requirements for the stock exchange are discussed. The current multi-cast protocols lack the reliability, fairness, and scalability needed in this application. We describe a distributed architecture and a timed reliable multicast protocol, TRMP, that has the appropriate characteristics. We consider three applications: (1) A unified stock ticker of the transactions that are being conducted on the various physical and electronic exchanges. Our objective is to deliver the same combined ticker reliably and simultaneously to all receivers, anywhere in the world. (2) A unified sequence of buy and sell offers that are delivered to a single exchange or a collection of exchanges. Our objectives is to give all traders the same fair access to an exchange independent of their relative distances to the exchange or the delay and loss characteristics of the international network. (3) A distributed, electronic trading floor that can replace the current exchanges. This application has the fairness attributes of the first two applications and uses TRMP to conduct irrefutable, distributed trades.", acknowledgement = ack-nhfb, generalterms = "Design; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "multicast", subject = "Computer Systems Organization --- Computer-Communication Networks --- Network Architecture and Design (C.2.1); Computer Systems Organization --- Computer-Communication Networks --- Network Protocols (C.2.2); Computer Systems Organization --- Computer-Communication Networks --- Distributed Systems (C.2.4)", } @Article{Collins:2001:RIC, author = "Jamison D. Collins and Dean M. Tullsen", title = "Runtime identification of cache conflict misses: {The} adaptive miss buffer", journal = j-TOCS, volume = "19", number = "4", pages = "413--439", month = nov, year = "2001", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Feb 19 15:24:55 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Waldvogel:2001:SHS, author = "Marcel Waldvogel and George Varghese and Jon Turner and Bernhard Plattner", title = "Scalable high-speed prefix matching", journal = j-TOCS, volume = "19", number = "4", pages = "440--482", month = nov, year = "2001", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Feb 19 15:24:55 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Alvarez:2001:MAR, author = "Guillermo A. Alvarez and Elizabeth Borowsky and Susie Go and Theodore H. Romer and Ralph Becker-Szendy and Richard Golding and Arif Merchant and Mirjana Spasojevic and Alistair Veitch and John Wilkes", title = "{Minerva}: An automated resource provisioning tool for large-scale storage systems", journal = j-TOCS, volume = "19", number = "4", pages = "483--518", month = nov, year = "2001", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Feb 19 15:24:55 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Fu:2002:FSD, author = "Kevin Fu and M. Frans Kaashoek and David Mazi{\`e}res", title = "Fast and secure distributed read-only file system", journal = j-TOCS, volume = "20", number = "1", pages = "1--24", month = feb, year = "2002", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Aug 7 10:13:23 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Anderson:2002:IRR, author = "Darrell C. Anderson and Jeffrey S. Chase and Amin M. Vahdat", title = "Interposed request routing for scalable network storage", journal = j-TOCS, volume = "20", number = "1", pages = "25--48", month = feb, year = "2002", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Aug 7 10:13:23 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Ganger:2002:FFA, author = "Gregory R. Ganger and Dawson R. Engler and M. Frans Kaashoek and H{\'e}ctor M. Brice{\~n}o and Russell Hunt and Thomas Pinckney", title = "Fast and flexible application-level networking on exokernel systems", journal = j-TOCS, volume = "20", number = "1", pages = "49--83", month = feb, year = "2002", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Aug 7 10:13:23 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Satyanarayanan:2002:EC, author = "M. Satyanarayanan", title = "The evolution of {Coda}", journal = j-TOCS, volume = "20", number = "2", pages = "85--124", month = may, year = "2002", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Aug 7 10:13:24 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Burgess:2002:MSN, author = "Mark Burgess and H{\aa}rek Haugerud and Sigmund Straumsnes and Trond Reitan", title = "Measuring system normality", journal = j-TOCS, volume = "20", number = "2", pages = "125--160", month = may, year = "2002", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Aug 7 10:13:24 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Hu:2002:LCD, author = "Zhigang Hu and Stefanos Kaxiras and Margaret Martonosi", title = "Let caches decay: reducing leakage energy via exploitation of cache generational behavior", journal = j-TOCS, volume = "20", number = "2", pages = "161--190", month = may, year = "2002", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Aug 7 10:13:24 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Keidar:2002:MGM, author = "Idit Keidar and Jeremy Sussman and Keith Marzullo and Danny Dolev", title = "{Moshe}: a group membership service for {WANs}", journal = j-TOCS, volume = "20", number = "3", pages = "191--238", month = aug, year = "2002", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Aug 7 10:17:48 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Yu:2002:DEC, author = "Haifeng Yu and Amin Vahdat", title = "Design and evaluation of a conit-based continuous consistency model for replicated services", journal = j-TOCS, volume = "20", number = "3", pages = "239--282", month = aug, year = "2002", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Aug 7 10:17:48 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "conit (consistency unit)", } @Article{Zdancewic:2002:SPP, author = "Steve Zdancewic and Lantian Zheng and Nathaniel Nystrom and Andrew C. Myers", title = "Secure program partitioning", journal = j-TOCS, volume = "20", number = "3", pages = "283--328", month = aug, year = "2002", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Aug 7 10:17:48 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Zhou:2002:CSD, author = "Lidong Zhou and Fred B. Schneider and Robbert {Van Renesse}", title = "{COCA}: a secure distributed online certification authority", journal = j-TOCS, volume = "20", number = "4", pages = "329--368", month = nov, year = "2002", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Aug 7 10:13:24 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Jimenez:2002:NMD, author = "Daniel A. Jim{\'e}nez and Calvin Lin", title = "Neural methods for dynamic branch prediction", journal = j-TOCS, volume = "20", number = "4", pages = "369--397", month = nov, year = "2002", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Aug 7 10:13:24 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Castro:2002:PBF, author = "Miguel Castro and Barbara Liskov", title = "Practical {Byzantine} fault tolerance and proactive recovery", journal = j-TOCS, volume = "20", number = "4", pages = "398--461", month = nov, year = "2002", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Aug 7 10:13:24 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Hu:2003:RTS, author = "Y. Charlie Hu and Weimin Yu and Alan Cox and Dan Wallach and Willy Zwaenepoel", title = "Run-time support for distributed sharing in safe languages", journal = j-TOCS, volume = "21", number = "1", pages = "1--35", month = feb, year = "2003", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Aug 7 10:21:30 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Arpaci-Dusseau:2003:RTA, author = "Remzi H. Arpaci-Dusseau", title = "Run-time adaptation in {River}", journal = j-TOCS, volume = "21", number = "1", pages = "36--86", month = feb, year = "2003", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Aug 7 10:21:30 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Nieh:2003:MTC, author = "Jason Nieh and S. Jae Yang and Naomi Novik", title = "Measuring thin-client performance using slow-motion benchmarking", journal = j-TOCS, volume = "21", number = "1", pages = "87--115", month = feb, year = "2003", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Aug 7 10:21:30 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Nieh:2003:SSM, author = "Jason Nieh and Monica S. Lam", title = "A {SMART} scheduler for multimedia applications", journal = j-TOCS, volume = "21", number = "2", pages = "117--163", month = may, year = "2003", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Aug 7 10:13:25 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{VanRenesse:2003:ARS, author = "Robbert {Van Renesse} and Kenneth P. Birman and Werner Vogels", title = "{Astrolabe}: a robust and scalable technology for distributed system monitoring, management, and data mining", journal = j-TOCS, volume = "21", number = "2", pages = "164--206", month = may, year = "2003", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Aug 7 10:13:25 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Harchol-Balter:2003:SBS, author = "Mor Harchol-Balter and Bianca Schroeder and Nikhil Bansal and Mukesh Agrawal", title = "Size-based scheduling to improve {Web} performance", journal = j-TOCS, volume = "21", number = "2", pages = "207--233", month = may, year = "2003", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Aug 7 10:13:25 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Ellis:2003:E, author = "Carla Schlatter Ellis", title = "Editorial", journal = j-TOCS, volume = "21", number = "3", pages = "235--235", month = aug, year = "2003", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Aug 7 10:13:26 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Castro:2003:BUA, author = "Miguel Castro and Rodrigo Rodrigues and Barbara Liskov", title = "{BASE}: {Using} abstraction to improve fault tolerance", journal = j-TOCS, volume = "21", number = "3", pages = "236--269", month = aug, year = "2003", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Aug 7 10:13:26 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Estan:2003:NDT, author = "Cristian Estan and George Varghese", title = "New directions in traffic measurement and accounting: {Focusing} on the elephants, ignoring the mice", journal = j-TOCS, volume = "21", number = "3", pages = "270--313", month = aug, year = "2003", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Aug 7 10:13:26 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Swanson:2003:ESI, author = "Steven Swanson and Luke K. McDowell and Michael M. Swift and Susan J. Eggers and Henry M. Levy", title = "An evaluation of speculative instruction execution on simultaneous multithreaded processors", journal = j-TOCS, volume = "21", number = "3", pages = "314--340", month = aug, year = "2003", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Aug 7 10:13:26 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Eugster:2003:LPB, author = "P. Th. Eugster and R. Guerraoui and S. B. Handurukande and P. Kouznetsov and A.-M. Kermarrec", title = "Lightweight probabilistic broadcast", journal = j-TOCS, volume = "21", number = "4", pages = "341--374", month = nov, year = "2003", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Oct 31 06:17:27 MST 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Hadzic:2003:BPF, author = "Ilija Had{\v{z}}i{\'c} and Jonathan M. Smith", title = "Balancing performance and flexibility with hardware support for network architectures", journal = j-TOCS, volume = "21", number = "4", pages = "375--411", month = nov, year = "2003", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Oct 31 06:17:27 MST 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Annavaram:2003:CGP, author = "Murali Annavaram and Jignesh M. Patel and Edward S. Davidson", title = "Call graph prefetching for database applications", journal = j-TOCS, volume = "21", number = "4", pages = "412--444", month = nov, year = "2003", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Oct 31 06:17:27 MST 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Reumann:2004:SDI, author = "John Reumann and Kang G. Shin", title = "Stateful distributed interposition", journal = j-TOCS, volume = "22", number = "1", pages = "1--48", month = feb, year = "2004", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Feb 2 14:07:29 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Olshefski:2004:UCI, author = "David Olshefski and Jason Nieh and Dakshi Agrawal", title = "Using {Certes} to infer client response time at the {Web} server", journal = j-TOCS, volume = "22", number = "1", pages = "49--93", month = feb, year = "2004", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Feb 2 14:07:29 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "Certes (CliEnt Response Time Estimated by the Server)", } @Article{Adve:2004:PPP, author = "Vikram S. Adve and Mary K. Vernon", title = "Parallel program performance prediction using deterministic task graph analysis", journal = j-TOCS, volume = "22", number = "1", pages = "94--136", month = feb, year = "2004", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Feb 2 14:07:29 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Flinn:2004:MBL, author = "Jason Flinn and M. Satyanarayanan", title = "Managing battery lifetime with energy-aware adaptation", journal = j-TOCS, volume = "22", number = "2", pages = "137--179", month = may, year = "2004", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Nov 4 08:16:45 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Ashok:2004:CCE, author = "Raksit Ashok and Saurabh Chheda and Csaba Andras Moritz", title = "Coupling compiler-enabled and conventional memory accessing for energy efficiency", journal = j-TOCS, volume = "22", number = "2", pages = "180--213", month = may, year = "2004", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Nov 4 08:16:45 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Choi:2004:GFP, author = "Seungryul Choi and Nicholas Kohout and Sumit Pamnani and Dongkeun Kim and Donald Yeung", title = "A general framework for prefetch scheduling in linked data structures and its application to multi-chain prefetching", journal = j-TOCS, volume = "22", number = "2", pages = "214--280", month = may, year = "2004", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Nov 4 08:16:45 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Verstoep:2004:CCP, author = "Kees Verstoep and Raoul A. F. Bhoedjang and Tim R{\"u}hl and Henri E. Bal and Rutger F. H. Hofman", title = "Cluster communication protocols for parallel-programming systems", journal = j-TOCS, volume = "22", number = "3", pages = "281--325", month = aug, year = "2004", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Nov 4 08:16:45 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Kim:2004:SSL, author = "Dongkeun Kim and Donald Yeung", title = "A study of source-level compiler algorithms for automatic construction of pre-execution code", journal = j-TOCS, volume = "22", number = "3", pages = "326--379", month = aug, year = "2004", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Nov 4 08:16:45 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Bartal:2004:FNF, author = "Yair Bartal and Alain Mayer and Kobbi Nissim and Avishai Wool", title = "{{\em Firmato\/}}: a novel firewall management toolkit", journal = j-TOCS, volume = "22", number = "4", pages = "381--420", month = nov, year = "2004", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Dec 2 05:29:12 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Grimm:2004:SSP, author = "Robert Grimm and Janet Davis and Eric Lemar and Adam Macbeth and Steven Swanson and Thomas Anderson and Brian Bershad and Gaetano Borriello and Steven Gribble and David Wetherall", title = "System support for pervasive applications", journal = j-TOCS, volume = "22", number = "4", pages = "421--486", month = nov, year = "2004", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Dec 2 05:29:12 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Ellis:2005:E, author = "Carla Schlatter Ellis", title = "Editorial", journal = j-TOCS, volume = "23", number = "1", pages = "1--1", month = feb, year = "2005", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Apr 14 10:29:37 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Maniatis:2005:LPP, author = "Petros Maniatis and Mema Roussopoulos and T. J. Giuli and David S. H. Rosenthal and Mary Baker", title = "The {LOCKSS} peer-to-peer digital preservation system", journal = j-TOCS, volume = "23", number = "1", pages = "2--50", month = feb, year = "2005", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Apr 14 10:29:37 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{King:2005:BI, author = "Samuel T. King and Peter M. Chen", title = "Backtracking intrusions", journal = j-TOCS, volume = "23", number = "1", pages = "51--76", month = feb, year = "2005", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Apr 14 10:29:37 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Swift:2005:IRC, author = "Michael M. Swift and Brian N. Bershad and Henry M. Levy", title = "Improving the reliability of commodity operating systems", journal = j-TOCS, volume = "23", number = "1", pages = "77--110", month = feb, year = "2005", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Apr 14 10:29:37 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Gluhovsky:2005:CMC, author = "Ilya Gluhovsky and Brian O'Krafka", title = "Comprehensive multiprocessor cache miss rate generation using multivariate models", journal = j-TOCS, volume = "23", number = "2", pages = "111--145", month = may, year = "2005", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon May 9 11:20:41 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Herlihy:2005:NMM, author = "Maurice Herlihy and Victor Luchangco and Paul Martin and Mark Moir", title = "Nonblocking memory management support for dynamic-sized data structures", journal = j-TOCS, volume = "23", number = "2", pages = "146--196", month = may, year = "2005", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon May 9 11:20:41 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Jimenez:2005:ILA, author = "Daniel A. Jim{\'e}nez", title = "Improved latency and accuracy for neural branch prediction", journal = j-TOCS, volume = "23", number = "2", pages = "197--218", month = may, year = "2005", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon May 9 11:20:41 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Jelasity:2005:GBA, author = "M{\'a}rk Jelasity and Alberto Montresor and Ozalp Babaoglu", title = "{Gossip}-based aggregation in large dynamic networks", journal = j-TOCS, volume = "23", number = "3", pages = "219--252", month = aug, year = "2005", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Nov 18 08:19:50 MST 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Steffan:2005:SAT, author = "J. Gregory Steffan and Christopher Colohan and Antonia Zhai and Todd C. Mowry", title = "The {STAMPede} approach to thread-level speculation", journal = j-TOCS, volume = "23", number = "3", pages = "253--300", month = aug, year = "2005", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Nov 18 08:19:50 MST 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Kontothanassis:2005:SMC, author = "Leonidas Kontothanassis and Robert Stets and Galen Hunt and Umit Rencuzogullari and Gautam Altekar and Sandhya Dwarkadas and Michael L. Scott", title = "Shared memory computing on clusters with symmetric multiprocessors and system area networks", journal = j-TOCS, volume = "23", number = "3", pages = "301--335", month = aug, year = "2005", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Nov 18 08:19:50 MST 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Anderson:2005:QFN, author = "Eric Anderson and Susan Spence and Ram Swaminathan and Mahesh Kallahalla and Qian Wang", title = "Quickly finding near-optimal storage designs", journal = j-TOCS, volume = "23", number = "4", pages = "337--374", month = nov, year = "2005", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Feb 4 09:45:56 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Patino-Martinez:2005:MRC, author = "Marta Pati{\~n}o-Martinez and Ricardo Jim{\'e}nez-Peris and Bettina Kemme and Gustavo Alonso", title = "{MIDDLE-R}: {Consistent} database replication at the middleware level", journal = j-TOCS, volume = "23", number = "4", pages = "375--423", month = nov, year = "2005", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Feb 4 09:45:56 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Hsu:2005:AIL, author = "Windsor W. Hsu and Alan Jay Smith and Honesty C. Young", title = "The automatic improvement of locality in storage systems", journal = j-TOCS, volume = "23", number = "4", pages = "424--473", month = nov, year = "2005", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Feb 4 09:45:56 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Keromytis:2006:COS, author = "Angelos D. Keromytis and Jason L. Wright and Theo {De Raadt} and Matthew Burnside", title = "Cryptography as an operating system service: a case study", journal = j-TOCS, volume = "24", number = "1", pages = "1--38", month = feb, year = "2006", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1124153.1124154", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Apr 7 08:15:08 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Cryptographic transformations are a fundamental building block in many security applications and protocols. To improve performance, several vendors market hardware accelerator cards. However, until now no operating system provided a mechanism that allowed both uniform and efficient use of this new type of resource. We present the OpenBSD Cryptographic Framework (OCF), a service virtualization layer implemented inside the operating system kernel, that provides uniform access to accelerator functionality by hiding card-specific details behind a carefully designed API. We evaluate the impact of the OCF in a variety of benchmarks, measuring overall system performance, application throughput and latency, and aggregate throughput when multiple applications make use of it. We conclude that the OCF is extremely efficient in utilizing cryptographic accelerator functionality, attaining 95\% of the theoretical peak device performance and over 800 Mbps aggregate throughput using 3DES. We believe that this validates our decision to opt for ease of use by applications and kernel components through a uniform API and for seamless support for new accelerators. Furthermore, our evaluation points to several bottlenecks in system and operating system design: data copying between user and kernel modes, PCI bus signaling inefficiency, protocols that use small data units, and single-threaded applications. We identify some of these limitations through a set of measurements focusing on application-layer cryptographic protocols such as SSL. We offer several suggestions for improvements and directions for future work. We provide experimental evidence of the effectiveness of a new approach which we call operating system shortcutting. Shortcutting can improve the performance of application-layer cryptographic protocols by 27\% with very small changes to the kernel.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Coarfa:2006:PAT, author = "Cristian Coarfa and Peter Druschel and Dan S. Wallach", title = "Performance analysis of {TLS Web} servers", journal = j-TOCS, volume = "24", number = "1", pages = "39--69", month = feb, year = "2006", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1124153.1124155", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Apr 7 08:15:08 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Yu:2006:CLA, author = "Haifeng Yu and Amin Vahdat", title = "The costs and limits of availability for replicated services", journal = j-TOCS, volume = "24", number = "1", pages = "70--113", month = feb, year = "2006", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1124153.1124156", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Apr 7 08:15:08 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Moore:2006:IID, author = "David Moore and Colleen Shannon and Douglas J. Brown and Geoffrey M. Voelker and Stefan Savage", title = "Inferring {Internet} denial-of-service activity", journal = j-TOCS, volume = "24", number = "2", pages = "115--139", month = may, year = "2006", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1132026.1132027", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu May 18 08:01:47 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "In this article, we seek to address a simple question: ``How prevalent are denial-of-service attacks in the Internet?'' Our motivation is to quantitatively understand the nature of the current threat as well as to enable longer-term analyses of trends and recurring patterns of attacks. We present a new technique, called ``backscatter analysis,'' that provides a conservative estimate of worldwide denial-of-service activity. We use this approach on 22 traces (each covering a week or more) gathered over three years from 2001 through 2004. Across this corpus we quantitatively assess the number, duration, and focus of attacks, and qualitatively characterize their behavior. In total, we observed over 68,000 attacks directed at over 34,000 distinct victim IP addresses---ranging from well-known e-commerce companies such as Amazon and Hotmail to small foreign ISPs and dial-up connections. We believe our technique is the first to provide quantitative estimates of Internet-wide denial-of-service activity and that this article describes the most comprehensive public measurements of such activity to date.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Holman:2006:LUP, author = "Philip Holman and James H. Anderson", title = "Locking under {Pfair} scheduling", journal = j-TOCS, volume = "24", number = "2", pages = "140--174", month = may, year = "2006", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1132026.1132028", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu May 18 08:01:47 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "We present several locking synchronization protocols for Pfair-scheduled multiprocessor systems. We focus on two classes of protocols. The first class is only applicable in systems in which all critical sections are short relative to the length of the scheduling quantum. In this case, efficient synchronization can be achieved by ensuring that all locks have been released before tasks are preempted. This is accomplished by exploiting the quantum-based nature of Pfair scheduling, which provides a priori knowledge of all possible preemption points. The second and more general protocol class is applicable to any system. For this class, we consider the use of a client-server model. We also discuss the viability of inheritance-based protocols in Pfair-scheduled systems.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Lai:2006:PWA, author = "Albert M. Lai and Jason Nieh", title = "On the performance of wide-area thin-client computing", journal = j-TOCS, volume = "24", number = "2", pages = "175--209", month = may, year = "2006", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1132026.1132029", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu May 18 08:01:47 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "While many application service providers have proposed using thin-client computing to deliver computational services over the Internet, little work has been done to evaluate the effectiveness of thin-client computing in a wide-area network. To assess the potential of thin-client computing in the context of future commodity high-bandwidth Internet access, we have used a novel, noninvasive slow-motion benchmarking technique to evaluate the performance of several popular thin-client computing platforms in delivering computational services cross-country over Internet2. Our results show that using thin-client computing in a wide-area network environment can deliver acceptable performance over Internet2, even when client and server are located thousands of miles apart on opposite ends of the country. However, performance varies widely among thin-client platforms and not all platforms are suitable for this environment. While many thin-client systems are touted as being bandwidth efficient, we show that network latency is often the key factor in limiting wide-area thin-client performance. Furthermore, we show that the same techniques used to improve bandwidth efficiency often result in worse overall performance in wide-area networks. We characterize and analyze the different design choices in the various thin-client platforms and explain which of these choices should be selected for supporting wide-area computing services.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Vachharajani:2006:LSE, author = "Manish Vachharajani and Neil Vachharajani and David A. Penry and Jason A. Blome and Sharad Malik and David I. August", title = "The {Liberty Simulation Environment}: a deliberate approach to high-level system modeling", journal = j-TOCS, volume = "24", number = "3", pages = "211--249", month = aug, year = "2006", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Aug 29 05:29:09 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Barr:2006:EAL, author = "Kenneth C. Barr and Krste Asanovi{\'c}", title = "Energy-aware lossless data compression", journal = j-TOCS, volume = "24", number = "3", pages = "250--291", month = aug, year = "2006", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Aug 29 05:29:09 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Yuan:2006:EEC, author = "Wanghong Yuan and Klara Nahrstedt", title = "Energy-efficient {CPU} scheduling for multimedia applications", journal = j-TOCS, volume = "24", number = "3", pages = "292--331", month = aug, year = "2006", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1151690.1151692", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Aug 29 05:29:09 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Wireless transmission of a single bit can require over 1000 times more energy than a single computation. It can therefore be beneficial to perform additional computation to reduce the number of bits transmitted. If the energy required to compress data is less than the energy required to send it, there is a net energy savings and an increase in battery life for portable computers. This article presents a study of the energy savings possible by losslessly compressing data prior to transmission. A variety of algorithms were measured on a StrongARM SA-110 processor. This work demonstrates that, with several typical compression algorithms, there is a actually a net energy increase when compression is applied before transmission. Reasons for this increase are explained and suggestions are made to avoid it. One such energy-aware suggestion is asymmetric compression, the use of one compression algorithm on the transmit side and a different algorithm for the receive path. By choosing the lowest-energy compressor and decompressor on the test platform, overall energy to send and receive data can be reduced by 11\% compared with a well-chosen symmetric pair, or up to 57\% over the default symmetric {\tt zlib} scheme.", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Swift:2006:RDD, author = "Michael M. Swift and Muthukaruppan Annamalai and Brian N. Bershad and Henry M. Levy", title = "Recovering device drivers", journal = j-TOCS, volume = "24", number = "4", pages = "333--360", month = nov, year = "2006", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Nov 29 16:06:54 MST 2007", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Nightingale:2006:SED, author = "Edmund B. Nightingale and Peter M. Chen and Jason Flinn", title = "Speculative execution in a distributed file system", journal = j-TOCS, volume = "24", number = "4", pages = "361--392", month = nov, year = "2006", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Nov 29 16:06:54 MST 2007", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Yang:2006:UMC, author = "Junfeng Yang and Paul Twohey and Dawson Engler and Madanlal Musuvathi", title = "Using model checking to find serious file system errors", journal = j-TOCS, volume = "24", number = "4", pages = "393--423", month = nov, year = "2006", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Nov 29 16:06:54 MST 2007", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Higham:2007:SMC, author = "Lisa Higham and Lillanne Jackson and Jalal Kawash", title = "Specifying memory consistency of write buffer multiprocessors", journal = j-TOCS, volume = "25", number = "1", pages = "1:1--1:??", month = feb, year = "2007", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Nov 29 16:06:55 MST 2007", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, articleno = "1", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Gluhovsky:2007:CME, author = "Ilya Gluhovsky and David Vengerov and Brian O'Krafka", title = "Comprehensive multivariate extrapolation modeling of multiprocessor cache miss rates", journal = j-TOCS, volume = "25", number = "1", pages = "2:1--2:??", month = feb, year = "2007", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Nov 29 16:06:55 MST 2007", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, articleno = "2", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Behar:2007:TCS, author = "Michael Behar and Avi Mendelson and Avinoam Kolodny", title = "Trace cache sampling filter", journal = j-TOCS, volume = "25", number = "1", pages = "3:1--3:??", month = feb, year = "2007", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Nov 29 16:06:55 MST 2007", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, articleno = "3", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Swanson:2007:WA, author = "Steven Swanson and Andrew Schwerin and Martha Mercaldi and Andrew Petersen and Andrew Putnam and Ken Michelson and Mark Oskin and Susan J. Eggers", title = "The {WaveScalar} architecture", journal = j-TOCS, volume = "25", number = "2", pages = "4:1--4:??", month = may, year = "2007", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Nov 29 16:06:56 MST 2007", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, articleno = "4", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Fraser:2007:CPL, author = "Keir Fraser and Tim Harris", title = "Concurrent programming without locks", journal = j-TOCS, volume = "25", number = "2", pages = "5:1--5:??", month = may, year = "2007", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Nov 29 16:06:56 MST 2007", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, articleno = "5", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Appavoo:2007:EDO, author = "Jonathan Appavoo and Dilma {Da Silva} and Orran Krieger and Marc Auslander and Michal Ostrowski and Bryan Rosenburg and Amos Waterland and Robert W. Wisniewski and Jimi Xenidis and Michael Stumm and Livio Soares", title = "Experience distributing objects in an {SMMP OS}", journal = j-TOCS, volume = "25", number = "3", pages = "6:1--6:??", month = aug, year = "2007", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Nov 29 16:06:57 MST 2007", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, articleno = "6", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Qin:2007:RTB, author = "Feng Qin and Joseph Tucek and Yuanyuan Zhou and Jagadeesan Sundaresan", title = "Rx: {Treating} bugs as allergies---a safe method to survive software failures", journal = j-TOCS, volume = "25", number = "3", pages = "7:1--7:??", month = aug, year = "2007", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Nov 29 16:06:57 MST 2007", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, articleno = "7", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Jelasity:2007:GBP, author = "M{\'a}rk Jelasity and Spyros Voulgaris and Rachid Guerraoui and Anne-Marie Kermarrec and Maarten van Steen", title = "Gossip-based peer sampling", journal = j-TOCS, volume = "25", number = "3", pages = "8:1--8:??", month = aug, year = "2007", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Nov 29 16:06:57 MST 2007", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, articleno = "8", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Xu:2007:MEE, author = "Ruibin Xu and Daniel Moss{\'e} and Rami Melhem", title = "Minimizing expected energy consumption in real-time systems through dynamic voltage scaling", journal = j-TOCS, volume = "25", number = "4", pages = "9:1--9:??", month = dec, year = "2007", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1314299.1314300", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Jun 16 17:52:15 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Many real-time systems, such as battery-operated embedded devices, are energy constrained. A common problem for these systems is how to reduce energy consumption in the system as much as possible while still meeting the deadlines; a commonly used power management mechanism by these systems is dynamic voltage scaling (DVS). Usually, the workloads executed by these systems are variable and, more often than not, unpredictable. Because of the unpredictability of the workloads, one cannot guarantee to minimize the energy consumption in the system. However, if the variability of the workloads can be captured by the probability distribution of the computational requirement of each task in the system, it is possible to achieve the goal of minimizing the expected energy consumption in the system. In this paper, we investigate DVS schemes that aim at minimizing expected energy consumption for frame-based hard real-time systems. Our investigation considers various DVS strategies (i.e., intra-task DVS, inter-task DVS, and hybrid DVS) and both an ideal system model (i.e., assuming unrestricted continuous frequency, well-defined power-frequency relation, and no speed change overhead) and a realistic system model (i.e., the processor provides a set of discrete speeds, no assumption is made on power-frequency relation, and speed change overhead is considered). The highlights of the investigation are two practical DVS schemes: Practical PACE (PPACE) for a single task and Practical Inter-Task DVS (PITDVS2) for general frame-based systems. Evaluation results show that our proposed schemes outperform and achieve significant energy savings over existing schemes.", acknowledgement = ack-nhfb, articleno = "9", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "dynamic voltage scaling; power management; processor acceleration to conserve energy; real-time", } @Article{Hur:2007:MSM, author = "Ibrahim Hur and Calvin Lin", title = "Memory scheduling for modern microprocessors", journal = j-TOCS, volume = "25", number = "4", pages = "10:1--10:??", month = dec, year = "2007", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1314299.1314301", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Jun 16 17:52:15 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "The need to carefully schedule memory operations has increased as memory performance has become increasingly important to overall system performance. This article describes the adaptive history-based (AHB) scheduler, which uses the history of recently scheduled operations to provide three conceptual benefits: (1) it allows the scheduler to better reason about the delays associated with its scheduling decisions, (2) it provides a mechanism for combining multiple constraints, which is important for increasingly complex DRAM structures, and (3) it allows the scheduler to select operations so that they match the program's mixture of Reads and Writes, thereby avoiding certain bottlenecks within the memory controller.\par We have previously evaluated this scheduler in the context of the IBM Power5. When compared with the state of the art, this scheduler improves performance by 15.6\\%, 9.9\\%, and 7.6\\% for the Stream, NAS, and commercial benchmarks, respectively. This article expands our understanding of the AHB scheduler in a variety of ways. Looking backwards, we describe the scheduler in the context of prior work that focused exclusively on avoiding bank conflicts, and we show that the AHB scheduler is superior for the IBM Power5, which we argue will be representative of future microprocessor memory controllers. Looking forwards, we evaluate this scheduler in the context of future systems by varying a number of microarchitectural features and hardware parameters. For example, we show that the benefit of this scheduler increases as we move to multithreaded environments.", acknowledgement = ack-nhfb, articleno = "10", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "adaptive history-based scheduling; memory scheduling; memory system performance", } @Article{Vandebogart:2007:LEP, author = "Steve Vandebogart and Petros Efstathopoulos and Eddie Kohler and Maxwell Krohn and Cliff Frey and David Ziegler and Frans Kaashoek and Robert Morris and David Mazi{\`e}res", title = "Labels and event processes in the {Asbestos} operating system", journal = j-TOCS, volume = "25", number = "4", pages = "11:1--11:??", month = dec, year = "2007", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1314299.1314302", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Jun 16 17:52:15 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Asbestos, a new operating system, provides novel labeling and isolation mechanisms that help contain the effects of exploitable software flaws. Applications can express a wide range of policies with Asbestos's kernel-enforced labels, including controls on interprocess communication and system-wide information flow. A new event process abstraction defines lightweight, isolated contexts within a single process, allowing one process to act on behalf of multiple users while preventing it from leaking any single user's data to others. A Web server demonstration application uses these primitives to isolate private user data. Since the untrusted workers that respond to client requests are constrained by labels, exploited workers cannot directly expose user data except as allowed by application policy. The server application requires 1.4 memory pages per user for up to 145,000 users and achieves connection rates similar to Apache, demonstrating that additional security can come at an acceptable cost.", acknowledgement = ack-nhfb, articleno = "11", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "information flow; labels; mandatory access control; process abstractions; secure Web servers", } @Article{Coulson:2008:GCM, author = "Geoff Coulson and Gordon Blair and Paul Grace and Fran{\c{c}}ois Taiani and Ackbar Joolia and Kevin Lee and Jo Ueyama and Thirunavukkarasu Sivaharan", title = "A generic component model for building systems software", journal = j-TOCS, volume = "26", number = "1", pages = "1:1--1:??", month = feb, year = "2008", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1328671.1328672", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Jun 16 17:52:22 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Component-based software structuring principles are now commonplace at the application level; but componentization is far less established when it comes to building low-level systems software. Although there have been pioneering efforts in applying componentization to systems-building, these efforts have tended to target specific application domains (e.g., embedded systems, operating systems, communications systems, programmable networking environments, or middleware platforms). They also tend to be targeted at specific deployment environments (e.g., standard personal computer (PC) environments, network processors, or microcontrollers). The disadvantage of this narrow targeting is that it fails to maximize the genericity and abstraction potential of the component approach. In this article, we argue for the benefits and feasibility of a generic yet tailorable approach to component-based systems-building that offers a uniform programming model that is applicable in a wide range of systems-oriented target domains and deployment environments. The component model, called OpenCom, is supported by a reflective runtime architecture that is itself built from components. After describing OpenCom and evaluating its performance and overhead characteristics, we present and evaluate two case studies of systems we have built using OpenCom technology, thus illustrating its benefits and its general applicability.", acknowledgement = ack-nhfb, articleno = "1", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "component-based software; computer systems implementation", } @Article{Colohan:2008:IPD, author = "Christopher B. Colohan and Anastassia Ailamaki and J. Gregory Steffan and Todd C. Mowry", title = "Incrementally parallelizing database transactions with thread-level speculation", journal = j-TOCS, volume = "26", number = "1", pages = "2:1--2:??", month = feb, year = "2008", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1328671.1328673", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Jun 16 17:52:22 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "With the advent of chip multiprocessors, exploiting intratransaction parallelism in database systems is an attractive way of improving transaction performance. However, exploiting intratransaction parallelism is difficult for two reasons: first, significant changes are required to avoid races or conflicts within the DBMS; and second, adding threads to transactions requires a high level of sophistication from transaction programmers. In this article we show how dividing a transaction into speculative threads solves both problems --- it minimizes the changes required to the DBMS, and the details of parallelization are hidden from the transaction programmer. Our technique requires a limited number of small, localized changes to a subset of the low-level data structures in the DBMS. Through this method of incrementally parallelizing transactions, we can dramatically improve performance: on a simulated four-processor chip-multiprocessor, we improve the response time by 44--66\\% for three of the five TPC-C transactions, assuming the availability of idle processors.", acknowledgement = ack-nhfb, articleno = "2", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "chip-multiprocessing; incremental parallelization; optimistic concurrency; thread-level speculation", } @Article{Kostic:2008:HBD, author = "Dejan Kosti{\'c} and Alex C. Snoeren and Amin Vahdat and Ryan Braud and Charles Killian and James W. Anderson and Jeannie Albrecht and Adolfo Rodriguez and Erik Vandekieft", title = "High-bandwidth data dissemination for large-scale distributed systems", journal = j-TOCS, volume = "26", number = "1", pages = "3:1--3:??", month = feb, year = "2008", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1328671.1328674", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Jun 16 17:52:22 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "This article focuses on the multireceiver data dissemination problem. Initially, IP multicast formed the basis for efficiently supporting such distribution. More recently, overlay networks have emerged to support point-to-multipoint communication. Both techniques focus on constructing trees rooted at the source to distribute content among all interested receivers. We argue, however, that trees have two fundamental limitations for data dissemination. First, since all data comes from a single parent, participants must often continuously probe in search of a parent with an acceptable level of bandwidth. Second, due to packet losses and failures, available bandwidth is monotonically decreasing down the tree.\par To address these limitations, we present Bullet, a data dissemination mesh that takes advantage of the computational and storage capabilities of end hosts to create a distribution structure where a node receives data in parallel from multiple peers. For the mesh to deliver improved bandwidth and reliability, we need to solve several key problems: (i) disseminating disjoint data over the mesh, (ii) locating missing content, (iii) finding who to peer with (peering strategy), (iv) retrieving data at the right rate from all peers (flow control), and (v) recovering from failures and adapting to dynamically changing network conditions. Additionally, the system should be self-adjusting and should have few user-adjustable parameter settings. We describe our approach to addressing all of these problems in a working, deployed system across the Internet. Bullet outperforms state-of-the-art systems, including BitTorrent, by 25-70\\% and exhibits strong performance and reliability in a range of deployment settings. In addition, we find that, relative to tree-based solutions, Bullet reduces the need to perform expensive bandwidth probing.", acknowledgement = ack-nhfb, articleno = "3", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "bandwidth; overlays; peer-to-peer", } @Article{Chang:2008:BDS, author = "Fay Chang and Jeffrey Dean and Sanjay Ghemawat and Wilson C. Hsieh and Deborah A. Wallach and Mike Burrows and Tushar Chandra and Andrew Fikes and Robert E. Gruber", title = "{Bigtable}: a distributed storage system for structured data", journal = j-TOCS, volume = "26", number = "2", pages = "4:1--4:??", month = jun, year = "2008", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1365815.1365816", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Jun 16 17:52:30 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Bigtable is a distributed storage system for managing structured data that is designed to scale to a very large size: petabytes of data across thousands of commodity servers. Many projects at Google store data in Bigtable, including web indexing, Google Earth, and Google Finance. These applications place very different demands on Bigtable, both in terms of data size (from URLs to web pages to satellite imagery) and latency requirements (from backend bulk processing to real-time data serving). Despite these varied demands, Bigtable has successfully provided a flexible, high-performance solution for all of these Google products. In this article, we describe the simple data model provided by Bigtable, which gives clients dynamic control over data layout and format, and we describe the design and implementation of Bigtable.", acknowledgement = ack-nhfb, articleno = "4", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "large-scale distributed storage", } @Article{Bar-Yossef:2008:RRW, author = "Ziv Bar-Yossef and Roy Friedman and Gabriel Kliot", title = "{RaWMS} --- {Random Walk Based Lightweight Membership Service} for Wireless Ad Hoc Networks", journal = j-TOCS, volume = "26", number = "2", pages = "5:1--5:??", month = jun, year = "2008", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1365815.1365817", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Jun 16 17:52:30 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "This article presents RaWMS, a novel lightweight random membership service for ad hoc networks. The service provides each node with a partial uniformly chosen view of network nodes. Such a membership service is useful, for example, in data dissemination algorithms, lookup and discovery services, peer sampling services, and complete membership construction. The design of RaWMS is based on a novel reverse random walk (RW) sampling technique. The article includes a formal analysis of both the reverse RW sampling technique and RaWMS and verifies it through a detailed simulation study. In addition, RaWMS is compared both analytically and by simulations with a number of other known methods such as flooding and gossip-based techniques.", acknowledgement = ack-nhfb, articleno = "5", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "ad hoc networks; membership service; random walk", } @Article{Nightingale:2008:RS, author = "Edmund B. Nightingale and Kaushik Veeraraghavan and Peter M. Chen and Jason Flinn", title = "Rethink the sync", journal = j-TOCS, volume = "26", number = "3", pages = "6:1--6:26", month = sep, year = "2008", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1394441.1394442", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Sep 17 14:28:13 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "We introduce {\em external synchrony}, a new model for local file I/O that provides the reliability and simplicity of synchronous I/O, yet also closely approximates the performance of asynchronous I/O. An external observer cannot distinguish the output of a computer with an externally synchronous file system from the output of a computer with a synchronous file system. No application modification is required to use an externally synchronous file system. In fact, application developers can program to the simpler synchronous I/O abstraction and still receive excellent performance. We have implemented an externally synchronous file system for Linux, called xsyncfs. Xsyncfs provides the same durability and ordering-guarantees as those provided by a {\em synchronously\/} mounted ext3 file system. Yet even for I/O-intensive benchmarks, xsyncfs performance is within 7\% of ext3 mounted {\em asynchronously}. Compared to ext3 mounted synchronously, xsyncfs is up to two orders of magnitude faster.", acknowledgement = ack-nhfb, articleno = "6", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "causality; file systems; speculative execution; synchronous I/O", } @Article{Agrawal:2008:AWS, author = "Kunal Agrawal and Charles E. Leiserson and Yuxiong He and Wen Jing Hsu", title = "Adaptive work-stealing with parallelism feedback", journal = j-TOCS, volume = "26", number = "3", pages = "7:1--7:32", month = sep, year = "2008", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1394441.1394443", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Sep 17 14:28:13 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Multiprocessor scheduling in a shared multiprogramming environment can be structured as two-level scheduling, where a kernel-level job scheduler allots processors to jobs and a user-level thread scheduler schedules the work of a job on its allotted processors. We present a randomized work-stealing thread scheduler for fork-join multithreaded jobs that provides continual parallelism feedback to the job scheduler in the form of requests for processors. Our A-STEAL algorithm is appropriate for large parallel servers where many jobs share a common multiprocessor resource and in which the number of processors available to a particular job may vary during the job's execution. Assuming that the job scheduler never allots a job more processors than requested by the job's thread scheduler, A-STEAL guarantees that the job completes in near-optimal time while utilizing at least a constant fraction of the allotted processors.\par We model the job scheduler as the thread scheduler's adversary, challenging the thread scheduler to be robust to the operating environment as well as to the job scheduler's administrative policies. For example, the job scheduler might make a large number of processors available exactly when the job has little use for them. To analyze the performance of our adaptive thread scheduler under this stringent adversarial assumption, we introduce a new technique called {\em trim analysis,\/} which allows us to prove that our thread scheduler performs poorly on no more than a small number of time steps, exhibiting near-optimal behavior on the vast majority.\par More precisely, suppose that a job has work $T_1$ and span $T_\infty$. On a machine with $P$ processors, A-STEAL completes the job in an expected duration of $O(T_1 / \tilde{P} + T_\infty + L \lg P)$ time steps, where $L$ is the length of a scheduling quantum, and $\tilde{P}$ denotes the $O(T_\infty + L \lg P)$-trimmed availability. This quantity is the average of the processor availability over all time steps except the $O(T_\infty + L \lg P)$ time steps that have the highest processor availability. When the job's parallelism dominates the trimmed availability, that is, $\tilde{P} \ll T_1 / T_\infty$, the job achieves nearly perfect linear speedup. Conversely, when the trimmed mean dominates the parallelism, the asymptotic running time of the job is nearly the length of its span, which is optimal.\par We measured the performance of A-STEAL on a simulated multiprocessor system using synthetic workloads. For jobs with sufficient parallelism, our experiments confirm that A-STEAL provides almost perfect linear speedup across a variety of processor availability profiles. We compared A-STEAL with the ABP algorithm, an adaptive work-stealing thread scheduler developed by Arora et al. [1998] which does not employ parallelism feedback. On moderately to heavily loaded machines with large numbers of processors, A-STEAL typically completed jobs more than twice as quickly as ABP, despite being allotted the same number or fewer processors on every step, while wasting only 10\% of the processor cycles wasted by ABP.", acknowledgement = ack-nhfb, articleno = "7", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "adaptive scheduling; adversary; instantaneous parallelism; job scheduling; multiprocessing; multiprogramming; parallel computation; parallelism feedback; processor allocation; randomized algorithm; space sharing; span; thread scheduling; trim analysis; two-level scheduling; work; work-stealing", } @Article{Shieh:2008:SAC, author = "Alan Shieh and Andrew C. Myers and Emin G{\"u}n Sirer", title = "A stateless approach to connection-oriented protocols", journal = j-TOCS, volume = "26", number = "3", pages = "8:1--8:50", month = sep, year = "2008", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1394441.1394444", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Sep 17 14:28:13 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Traditional operating system interfaces and network protocol implementations force some system state to be kept on both sides of a connection. This state ties the connection to its endpoints, impedes transparent failover, permits denial-of-service attacks, and limits scalability. This article introduces a novel TCP-like transport protocol and a new interface to replace sockets that together enable all state to be kept on one endpoint, allowing the other endpoint, typically the server, to operate without any per-connection state. Called {\em Trickles}, this approach enables servers to scale well with increasing numbers of clients, consume fewer resources, and better resist denial-of-service attacks. Measurements on a full implementation in Linux indicate that Trickles achieves performance comparable to TCP/IP, interacts well with other flows, and scales well. Trickles also enables qualitatively different kinds of networked services. Services can be geographically replicated and contacted through an anycast primitive for improved availability and performance. Widely-deployed practices that currently have client-observable side effects, such as periodic server reboots, connection redirection, and failover, can be made transparent, and perform well, under Trickles. The protocol is secure against tampering and replay attacks, and the client interface is backward-compatible, requiring no changes to sockets-based client applications.", acknowledgement = ack-nhfb, articleno = "8", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "stateless interfaces; stateless protocols", } @Article{Costa:2008:VEE, author = "Manuel Costa and Jon Crowcroft and Miguel Castro and Antony Rowstron and Lidong Zhou and Lintao Zhang and Paul Barham", title = "{Vigilante}: End-to-end containment of {Internet} worm epidemics", journal = j-TOCS, volume = "26", number = "4", pages = "9:1--9:??", month = dec, year = "2008", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1455258.1455259", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Dec 23 13:36:21 MST 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Worm containment must be automatic because worms can spread too fast for humans to respond. Recent work proposed network-level techniques to automate worm containment; these techniques have limitations because there is no information about the vulnerabilities exploited by worms at the network level. We propose Vigilante, a new end-to-end architecture to contain worms automatically that addresses these limitations.\par In Vigilante, hosts detect worms by instrumenting vulnerable programs to analyze infection attempts. We introduce {\em dynamic data-flow analysis\/}: a broad-coverage host-based algorithm that can detect unknown worms by tracking the flow of data from network messages and disallowing unsafe uses of this data. We also show how to integrate other host-based detection mechanisms into the Vigilante architecture. Upon detection, hosts generate {\em self-certifying alerts\/} (SCAs), a new type of security alert that can be inexpensively verified by any vulnerable host. Using SCAs, hosts can cooperate to contain an outbreak, without having to trust each other. Vigilante broadcasts SCAs over an overlay network that propagates alerts rapidly and resiliently. Hosts receiving an SCA protect themselves by generating filters with {\em vulnerability condition slicing\/}: an algorithm that performs dynamic analysis of the vulnerable program to identify control-flow conditions that lead to successful attacks. These filters block the worm attack and all its polymorphic mutations that follow the execution path identified by the SCA.\par Our results show that Vigilante can contain fast-spreading worms that exploit unknown vulnerabilities, and that Vigilante's filters introduce a negligible performance overhead. Vigilante does not require any changes to hardware, compilers, operating systems, or the source code of vulnerable programs; therefore, it can be used to protect current software binaries.", acknowledgement = ack-nhfb, articleno = "9", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "dynamic data-flow analysis; program analysis; self-certifying alerts; vulnerability condition slicing; Worm containment", } @Article{Qiao:2008:IPP, author = "Yi Qiao and Fabi{\'a}n E. Bustamante and Peter A. Dinda and Stefan Birrer and Dong Lu", title = "Improving peer-to-peer performance through server-side scheduling", journal = j-TOCS, volume = "26", number = "4", pages = "10:1--10:??", month = dec, year = "2008", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1455258.1455260", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Dec 23 13:36:21 MST 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "We show how to significantly improve the mean response time seen by both uploaders and downloaders in peer-to-peer data-sharing systems. Our work is motivated by the observation that response times are largely determined by the performance of the peers serving the requested objects, that is, by the peers in their capacity as servers. With this in mind, we take a close look at this {\em server side\/} of peers, characterizing its workload by collecting and examining an extensive set of traces. Using trace-driven simulation, we demonstrate the promise and potential problems with scheduling policies based on shortest-remaining-processing-time (SRPT), the algorithm known to be optimal for minimizing mean response time. The key challenge to using SRPT in this context is determining request service times. In addressing this challenge, we introduce two new estimators that enable {\em predictive\/} SRPT scheduling policies that closely approach the performance of ideal SRPT. We evaluate our approach through extensive single-server and system-level simulation coupled with real Internet deployment and experimentation.", acknowledgement = ack-nhfb, articleno = "10", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "Peer-to-peer; scheduling; server-side; size-based scheduling; SRPT", } @Article{Choi:2009:HCS, author = "Seungryul Choi and Donald Yeung", title = "Hill-climbing {SMT} processor resource distribution", journal = j-TOCS, volume = "27", number = "1", pages = "1:1--1:??", month = feb, year = "2009", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Feb 13 18:30:25 MST 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "The key to high performance in Simultaneous MultiThreaded (SMT) processors lies in optimizing the distribution of shared resources to active threads. Existing resource distribution techniques optimize performance only indirectly. They infer potential performance bottlenecks by observing indicators, like instruction occupancy or cache miss counts, and take actions to try to alleviate them. While the corrective actions are designed to improve performance, their actual performance impact is not known since end performance is never monitored. Consequently, potential performance gains are lost whenever the corrective actions do not effectively address the actual bottlenecks occurring in the pipeline.\par We propose a different approach to SMT resource distribution that optimizes end performance directly. Our approach observes the impact that resource distribution decisions have on performance at runtime, and feeds this information back to the resource distribution mechanisms to improve future decisions. By evaluating many different resource distributions, our approach tries to learn the best distribution over time. Because we perform learning online, learning time is crucial. We develop a hill-climbing algorithm that quickly learns the best distribution of resources by following the performance gradient within the resource distribution space. We also develop several ideal learning algorithms to enable deeper insights through limit studies.\par This article conducts an in-depth investigation of hill-climbing SMT resource distribution using a comprehensive suite of 63 multiprogrammed workloads. Our results show hill-climbing outperforms ICOUNT, FLUSH, and DCRA (three existing SMT techniques) by 11.4\%, 11.5\%, and 2.8\%, respectively, under the weighted IPC metric. A limit study conducted using our ideal learning algorithms shows our approach can potentially outperform the same techniques by 19.2\%, 18.0\%, and 7.6\%, respectively, thus demonstrating additional room exists for further improvement. Using our ideal algorithms, we also identify three bottlenecks that limit online learning speed: local maxima, phased behavior, and interepoch jitter. We define metrics to quantify these learning bottlenecks, and characterize the extent to which they occur in our workloads. Finally, we conduct a sensitivity study, and investigate several extensions to improve our hill-climbing technique.", acknowledgement = ack-nhfb, articleno = "1", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Ntarmos:2009:DHS, author = "N. Ntarmos and P. Triantafillou and G. Weikum", title = "Distributed hash sketches: {Scalable}, efficient, and accurate cardinality estimation for distributed multisets", journal = j-TOCS, volume = "27", number = "1", pages = "2:1--2:??", month = feb, year = "2009", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Feb 13 18:30:25 MST 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Counting items in a distributed system, and estimating the cardinality of multisets in particular, is important for a large variety of applications and a fundamental building block for emerging Internet-scale information systems. Examples of such applications range from optimizing query access plans in peer-to-peer data sharing, to computing the significance (rank/score) of data items in distributed information retrieval. The general formal problem addressed in this article is computing the network-wide distinct number of items with some property (e.g., distinct files with file name containing ``spiderman'') where each node in the network holds an arbitrary subset, possibly overlapping the subsets of other nodes. The key requirements that a viable approach must satisfy are: (1) scalability towards very large network size, (2) efficiency regarding messaging overhead, (3) load balance of storage and access, (4) accuracy of the cardinality estimation, and (5) simplicity and easy integration in applications. This article contributes the DHS (Distributed Hash Sketches) method for this problem setting: a distributed, scalable, efficient, and accurate multiset cardinality estimator. DHS is based on hash sketches for probabilistic counting, but distributes the bits of each counter across network nodes in a judicious manner based on principles of Distributed Hash Tables, paying careful attention to fast access and aggregation as well as update costs. The article discusses various design choices, exhibiting tunable trade-offs between estimation accuracy, hop-count efficiency, and load distribution fairness. We further contribute a full-fledged, publicly available, open-source implementation of all our methods, and a comprehensive experimental evaluation for various settings.", acknowledgement = ack-nhfb, articleno = "2", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Eyerman:2009:MPM, author = "Stijn Eyerman and Lieven Eeckhout and Tejas Karkhanis and James E. Smith", title = "A mechanistic performance model for superscalar out-of-order processors", journal = j-TOCS, volume = "27", number = "2", pages = "3:1--3:??", month = may, year = "2009", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1534909.1534910", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed May 27 15:56:17 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "A mechanistic model for out-of-order superscalar processors is developed and then applied to the study of microarchitecture resource scaling. The model divides execution time into intervals separated by disruptive miss events such as branch mispredictions and cache misses. Each type of miss event results in characterizable performance behavior for the execution time interval. By considering an interval's type and length (measured in instructions), execution time can be predicted for the interval. Overall execution time is then determined by aggregating the execution time over all intervals. The mechanistic model provides several advantages over prior modeling approaches, and, when estimating performance, it differs from detailed simulation of a 4-wide out-of-order processor by an average of 7\%.\par The mechanistic model is applied to the general problem of resource scaling in out-of-order superscalar processors. First, we use the model to determine size relationships among microarchitecture structures in a balanced processor design. Second, we use the mechanistic model to study scaling of both pipeline depth and width in balanced processor designs. We corroborate previous results in this area and provide new results. For example, we show that at optimal design points, the pipeline depth times the square root of the processor width is nearly constant. Finally, we consider the behavior of unbalanced, overprovisioned processor designs based on insight gained from the mechanistic model. We show that in certain situations an overprovisioned processor may lead to improved overall performance. Designs where a processor's dispatch width is wider than its issue width are of particular interest.", acknowledgement = ack-nhfb, articleno = "3", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "analytical modeling; balanced processor design; mechanistic modeling; overprovisioned processor design; performance modeling; pipeline depth; pipeline width; resource scaling; Superscalar out-of-order processor; wide front-end dispatch processors", } @Article{Zagorodnov:2009:PLO, author = "Dmitrii Zagorodnov and Keith Marzullo and Lorenzo Alvisi and Thomas C. Bressoud", title = "Practical and low-overhead masking of failures of {TCP}-based servers", journal = j-TOCS, volume = "27", number = "2", pages = "4:1--4:??", month = may, year = "2009", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1534909.1534911", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed May 27 15:56:17 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "This article describes an architecture that allows a replicated service to survive crashes without breaking its TCP connections. Our approach does not require modifications to the TCP protocol, to the operating system on the server, or to any of the software running on the clients. Furthermore, it runs on commodity hardware. We compare two implementations of this architecture (one based on primary/backup replication and another based on message logging) focusing on scalability, failover time, and application transparency. We evaluate three types of services: a file server, a Web server, and a multimedia streaming server. Our experiments suggest that the approach incurs low overhead on throughput, scales well as the number of clients increases, and allows recovery of the service in near-optimal time.", acknowledgement = ack-nhfb, articleno = "4", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "Fault-tolerant computing system; primary/backup approach; TCP/IP", } @Article{Aguilera:2009:SNP, author = "Marcos K. Aguilera and Arif Merchant and Mehul Shah and Alistair Veitch and Christos Karamanolis", title = "{Sinfonia}: a new paradigm for building scalable distributed systems", journal = j-TOCS, volume = "27", number = "3", pages = "5:1--5:48", month = nov, year = "2009", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1629087.1629088", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Mar 15 09:06:12 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "We propose a new paradigm for building scalable distributed systems. Our approach does not require dealing with message-passing protocols, a major complication in existing distributed systems. Instead, developers just design and manipulate data structures within our service called Sinfonia. Sinfonia keeps data for applications on a set of memory nodes, each exporting a linear address space. At the core of Sinfonia is a new minitransaction primitive that enables efficient and consistent access to data, while hiding the complexities that arise from concurrency and failures. Using Sinfonia, we implemented two very different and complex applications in a few months: a cluster file system and a group communication service. Our implementations perform well and scale to hundreds of machines.", acknowledgement = ack-nhfb, articleno = "5", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "Distributed systems; fault tolerance; scalability; shared memory; transactions; two-phase commit", } @Article{Cherkasova:2009:AAD, author = "Ludmila Cherkasova and Kivanc Ozonat and Ningfang Mi and Julie Symons and Evgenia Smirni", title = "Automated anomaly detection and performance modeling of enterprise applications", journal = j-TOCS, volume = "27", number = "3", pages = "6:1--6:32", month = nov, year = "2009", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1629087.1629089", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Mar 15 09:06:12 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Automated tools for understanding application behavior and its changes during the application lifecycle are essential for many performance analysis and debugging tasks. Application performance issues have an immediate impact on customer experience and satisfaction. A sudden slowdown of enterprise-wide application can effect a large population of customers, lead to delayed projects, and ultimately can result in company financial loss. Significantly shortened time between new software releases further exacerbates the problem of thoroughly evaluating the performance of an updated application. Our thesis is that online performance modeling should be a part of routine application monitoring. Early, informative warnings on significant changes in application performance should help service providers to timely identify and prevent performance problems and their negative impact on the service. We propose a novel framework for automated anomaly detection and application change analysis. It is based on integration of two complementary techniques: (i) a regression-based transaction model that reflects a resource consumption model of the application, and (ii) an application performance signature that provides a compact model of runtime behavior of the application. The proposed integrated framework provides a simple and powerful solution for anomaly detection and analysis of essential performance changes in application behavior. An additional benefit of the proposed approach is its simplicity: It is not intrusive and is based on monitoring data that is typically available in enterprise production environments. The introduced solution further enables the automation of capacity planning and resource provisioning tasks of multitier applications in rapidly evolving IT environments.", acknowledgement = ack-nhfb, articleno = "6", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "Anomaly detection; capacity planning; multitier applications; online algorithms; performance modeling", } @Article{Kotla:2009:ZSB, author = "Ramakrishna Kotla and Lorenzo Alvisi and Mike Dahlin and Allen Clement and Edmund Wong", title = "{Zyzzyva}: {Speculative Byzantine} fault tolerance", journal = j-TOCS, volume = "27", number = "4", pages = "7:1--7:39", month = dec, year = "2009", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1658357.1658358", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Mar 15 09:06:46 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "A longstanding vision in distributed systems is to build reliable systems from unreliable components. An enticing formulation of this vision is Byzantine Fault-Tolerant (BFT) state machine replication, in which a group of servers collectively act as a correct server even if some of the servers misbehave or malfunction in arbitrary (``Byzantine'') ways. Despite this promise, practitioners hesitate to deploy BFT systems, at least partly because of the perception that BFT must impose high overheads.\par In this article, we present Zyzzyva, a protocol that uses speculation to reduce the cost of BFT replication. In Zyzzyva, replicas reply to a client's request without first running an expensive three-phase commit protocol to agree on the order to process requests. Instead, they optimistically adopt the order proposed by a primary server, process the request, and reply immediately to the client. If the primary is faulty, replicas can become temporarily inconsistent with one another, but clients detect inconsistencies, help correct replicas converge on a single total ordering of requests, and only rely on responses that are consistent with this total order. This approach allows Zyzzyva to reduce replication overheads to near their theoretical minima and to achieve throughputs of tens of thousands of requests per second, making BFT replication practical for a broad range of demanding services.", acknowledgement = ack-nhfb, articleno = "7", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "Byzantine fault tolerance; output commit; replication; speculative execution", } @Article{Vera:2009:SRL, author = "Xavier Vera and Jaume Abella and Javier Carretero and Antonio Gonz{\'a}lez", title = "Selective replication: a lightweight technique for soft errors", journal = j-TOCS, volume = "27", number = "4", pages = "8:1--8:30", month = dec, year = "2009", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1658357.1658359", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Mar 15 09:06:46 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Soft errors are an important challenge in contemporary microprocessors. Modern processors have caches and large memory arrays protected by parity or error detection and correction codes. However, today's failure rate is dominated by flip flops, latches, and the increasing sensitivity of combinational logic to particle strikes. Moreover, as Chip Multi-Processors (CMPs) become ubiquitous, meeting the FIT budget for new designs is becoming a major challenge.\par Solutions based on replicating threads have been explored deeply; however, their high cost in performance and energy make them unsuitable for current designs. Moreover, our studies based on a typical configuration for a modern processor show that focusing on the top 5 most vulnerable structures can provide up to 70\% reduction in FIT rate. Therefore, full replication may overprotect the chip by reducing the FIT much below budget.\par We propose {\em Selective Replication}, a lightweight-reconfigurable mechanism that achieves a high FIT reduction by protecting the most vulnerable instructions with minimal performance and energy impact. Low performance degradation is achieved by not requiring additional issue slots and reissuing instructions only during the time window between when they are retirable and they actually retire. Coverage can be reconfigured online by replicating only a subset of the instructions (the most vulnerable ones). Instructions' vulnerability is estimated based on the area they occupy and the time they spend in the issue queue. By changing the vulnerability threshold, we can adjust the trade-off between coverage and performance loss.\par Results for an out-of-order processor configured similarly to Intel{\reg} Core\TM{} Micro-Architecture show that our scheme can achieve over 65\% FIT reduction with less than 4\% performance degradation with small area and complexity overhead.", acknowledgement = ack-nhfb, articleno = "8", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "AVF prediction; FIT reduction; redundant multithreading; Soft errors", } @Article{Chen:2010:E, author = "Peter M. Chen", title = "Editorial", journal = j-TOCS, volume = "28", number = "1", pages = "1:1--1:??", month = mar, year = "2010", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1731060.1731061", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Apr 5 12:44:43 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, articleno = "1", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Yabandeh:2010:PPI, author = "Maysam Yabandeh and Nikola Kne{\v{z}}evi{\'c} and Dejan Kosti{\'c} and Viktor Kuncak", title = "Predicting and preventing inconsistencies in deployed distributed systems", journal = j-TOCS, volume = "28", number = "1", pages = "2:1--2:??", month = mar, year = "2010", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1731060.1731062", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Apr 5 12:44:43 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "We propose a new approach for developing and deploying distributed systems, in which nodes predict distributed consequences of their actions and use this information to detect and avoid errors. Each node continuously runs a state exploration algorithm on a recent consistent snapshot of its neighborhood and predicts possible future violations of specified safety properties. We describe a new state exploration algorithm, consequence prediction, which explores causally related chains of events that lead to property violation.\par This article describes the design and implementation of this approach, termed CrystalBall. We evaluate CrystalBall on RandTree, BulletPrime, Paxos, and Chord distributed system implementations. We identified new bugs in mature Mace implementations of three systems. Furthermore, we show that if the bug is not corrected during system development, CrystalBall is effective in steering the execution away from inconsistent states at runtime.", acknowledgement = ack-nhfb, articleno = "2", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "consequence prediction; Distributed systems; enforcing safety properties; execution steering; reliability", } @Article{Walfish:2010:DDO, author = "Michael Walfish and Mythili Vutukuru and Hari Balakrishnan and David Karger and Scott Shenker", title = "{DDoS} defense by offense", journal = j-TOCS, volume = "28", number = "1", pages = "3:1--3:??", month = mar, year = "2010", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1731060.1731063", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Apr 5 12:44:43 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "This article presents the design, implementation, analysis, and experimental evaluation of {\em speak-up}, a defense against {\em application-level\/} distributed denial-of-service (DDoS), in which attackers cripple a server by sending legitimate-looking requests that consume computational resources (e.g., CPU cycles, disk). With speak-up, a victimized server encourages all clients, resources permitting, {\em to automatically send higher volumes of traffic}. We suppose that attackers are already using most of their upload bandwidth so cannot react to the encouragement. Good clients, however, have spare upload bandwidth so can react to the encouragement with drastically higher volumes of traffic. The intended outcome of this traffic inflation is that the good clients crowd out the bad ones, thereby capturing a much larger fraction of the server's resources than before. We experiment under various conditions and find that speak-up causes the server to spend resources on a group of clients in rough proportion to their aggregate upload bandwidths, which is the intended result.", acknowledgement = ack-nhfb, articleno = "3", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "bandwidth; currency; DoS attack", } @Article{Roeder:2010:PO, author = "Tom Roeder and Fred B. Schneider", title = "Proactive obfuscation", journal = j-TOCS, volume = "28", number = "2", pages = "4:1--4:??", month = jul, year = "2010", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1813654.1813655", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jul 22 12:42:28 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "{\em Proactive obfuscation\/} is a new method for creating server replicas that are likely to have fewer shared vulnerabilities. It uses semantics-preserving code transformations to generate diverse executables, periodically restarting servers with these fresh versions. The periodic restarts help bound the number of compromised replicas that a service ever concurrently runs, and therefore proactive obfuscation makes an adversary's job harder. Proactive obfuscation was used in implementing two prototypes: a distributed firewall based on state-machine replication and a distributed storage service based on quorum systems. Costs intrinsic to supporting proactive obfuscation in replicated systems were evaluated by measuring the performance of these prototypes. The results show that employing proactive obfuscation adds little to the cost of replica-management protocols.", acknowledgement = ack-nhfb, articleno = "4", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "Byzantine fault tolerance; distributed systems; proactive recovery; quorum systems; state machine approach", } @Article{Guerraoui:2010:TOT, author = "Rachid Guerraoui and Ron R. Levy and Bastian Pochon and Vivien Qu{\'e}ma", title = "Throughput optimal total order broadcast for cluster environments", journal = j-TOCS, volume = "28", number = "2", pages = "5:1--5:??", month = jul, year = "2010", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1813654.1813656", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jul 22 12:42:28 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Total order broadcast is a fundamental communication primitive that plays a central role in bringing cheap software-based high availability to a wide range of services. This article studies the practical performance of such a primitive on a cluster of homogeneous machines.\par We present LCR, the first throughput optimal uniform total order broadcast protocol. LCR is based on a ring topology. It only relies on point-to-point inter-process communication and has a linear latency with respect to the number of processes. LCR is also fair in the sense that each process has an equal opportunity of having its messages delivered by all processes.\par We benchmark a C implementation of LCR against Spread and JGroups, two of the most widely used group communication packages. LCR provides higher throughput than the alternatives, over a large number of scenarios.", acknowledgement = ack-nhfb, articleno = "5", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "cluster computing; replication; software fault-tolerance; total order broadcast", } @Article{Amir:2010:SWM, author = "Yair Amir and Claudiu Danilov and Raluca Musu{\~a}loiu-Elefteri and Nilo Rivera", title = "The {SMesh} wireless mesh network", journal = j-TOCS, volume = "28", number = "3", pages = "6:1--6:??", month = sep, year = "2010", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1841313.1841314", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Sep 30 09:01:34 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Wireless mesh networks extend the connectivity range of mobile devices by using multiple access points, some of them connected to the Internet, to create a mesh topology and forward packets over multiple wireless hops. However, the quality of service provided by the mesh is impaired by the delays and disconnections caused by handoffs, as clients move within the area covered by multiple access points. We present the architecture and protocols of SMesh, the first transparent wireless mesh system that offers seamless, fast handoff, supporting real-time applications such as interactive VoIP. The handoff and routing logic is done solely by the access points, and therefore connectivity is attainable by any 802.11 device. In SMesh, the entire mesh network is seen by the mobile clients as a single, omnipresent access point, giving the mobile clients the illusion that they are stationary. We use multicast for access points coordination and, during handoff transitions, we use more than one access point to handle the moving client. SMesh provides a hybrid routing protocol that optimizes routes over wireless and wired links in a multihomed environment. Experimental results on a fully deployed mesh network demonstrate the effectiveness of the SMesh architecture and its intra-domain and inter-domain handoff protocols.", acknowledgement = ack-nhfb, articleno = "6", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "fast handoff; inter-domain; intra-domain; micromobility; Wireless mesh networks", } @Article{Friedman:2010:PQS, author = "Roy Friedman and Gabriel Kliot and Chen Avin", title = "Probabilistic quorum systems in wireless {Ad Hoc} networks", journal = j-TOCS, volume = "28", number = "3", pages = "7:1--7:??", month = sep, year = "2010", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1841313.1841315", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Sep 30 09:01:34 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Quorums are a basic construct in solving many fundamental distributed computing problems. One of the known ways of making quorums scalable and efficient is by weakening their intersection guarantee to being probabilistic. This article explores several access strategies for implementing probabilistic quorums in ad hoc networks. In particular, we present the first detailed study of asymmetric probabilistic biquorum systems, that allow to mix different access strategies and different quorums sizes, while guaranteeing the desired intersection probability. We show the advantages of asymmetric probabilistic biquorum systems in ad hoc networks. Such an asymmetric construction is also useful for other types of networks with nonuniform access costs (e.g., peer-to-peer networks). The article includes a formal analysis of these approaches backed up by an extensive simulation-based study. The study explores the impact of various parameters such as network size, network density, mobility speed, and churn. In particular, we show that one of the strategies that uses random walks exhibits the smallest communication overhead, thus being very attractive for ad hoc networks.", acknowledgement = ack-nhfb, articleno = "7", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", keywords = "Distributed middleware; location service; quorums systems; random walks; wireless ad hoc networks", } @Article{Blagodurov:2010:CAS, author = "Sergey Blagodurov and Sergey Zhuravlev and Alexandra Fedorova", title = "Contention-Aware Scheduling on Multicore Systems", journal = j-TOCS, volume = "28", number = "4", pages = "8:1--8:??", month = dec, year = "2010", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1880018.1880019", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Dec 23 17:06:32 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Contention for shared resources on multicore processors remains an unsolved problem in existing systems despite significant research efforts dedicated to this problem in the past. Previous solutions focused primarily on hardware techniques and software page coloring to mitigate this problem. Our goal is to investigate how and to what extent contention for shared resource can be mitigated via thread scheduling. Scheduling is an attractive tool, because it does not require extra hardware and is relatively easy to integrate into the system. Our study is the first to provide a comprehensive analysis of contention-mitigating techniques that use only scheduling. The most difficult part of the problem is to find a classification scheme for threads, which would determine how they affect each other when competing for shared resources.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Cheung:2010:LBC, author = "Alex King Yeung Cheung and Hans-Arno Jacobsen", title = "Load Balancing Content-Based Publish\slash Subscribe Systems", journal = j-TOCS, volume = "28", number = "4", pages = "9:1--9:??", month = dec, year = "2010", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1880018.1880020", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Dec 23 17:06:32 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Distributed content-based publish/subscribe systems suffer from performance degradation and poor scalability caused by uneven load distributions typical in real-world applications. The reason for this shortcoming is the lack of a load balancing scheme. This article proposes a load balancing solution specifically tailored to the needs of content-based publish/subscribe systems that is distributed, dynamic, adaptive, transparent, and accommodates heterogeneity. The solution consists of three key contributions: a load balancing framework, a novel load estimation algorithm, and three offload strategies. A working prototype of our solution is built on an open-sourced content-based publish/subscribe system and evaluated on PlanetLab, a cluster testbed, and in simulations.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Kim:2011:SSE, author = "Changhoon Kim and Matthew Caesar and Jennifer Rexford", title = "{SEATTLE}: a {Scalable Ethernet Architecture for Large Enterprises}", journal = j-TOCS, volume = "29", number = "1", pages = "1:1--1:35", month = feb, year = "2011", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1925109.1925110", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Feb 28 16:17:43 MST 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "IP networks today require massive effort to configure and manage. Ethernet is vastly simpler to manage, but does not scale beyond small local area networks. This article describes an alternative network architecture called SEATTLE that achieves the best of both worlds: The scalability of IP combined with the simplicity of Ethernet. SEATTLE provides plug-and-play functionality via flat addressing, while ensuring scalability and efficiency through shortest-path routing and hash-based resolution of host information. In contrast to previous work on identity-based routing, SEATTLE ensures path predictability, controllability, and stability, thus simplifying key network-management operations, such as capacity planning, traffic engineering, and troubleshooting. We performed a simulation study driven by real-world traffic traces and network topologies, and used Emulab to evaluate a prototype of our design based on the Click and XORP open-source routing platforms.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Lagar-Cavilla:2011:SVM, author = "H. Andr{\'e}s Lagar-Cavilla and Joseph A. Whitney and Roy Bryant and Philip Patchin and Michael Brudno and Eyal de Lara and Stephen M. Rumble and M. Satyanarayanan and Adin Scannell", title = "{SnowFlock}: Virtual Machine Cloning as a First-Class Cloud Primitive", journal = j-TOCS, volume = "29", number = "1", pages = "2:1--2:45", month = feb, year = "2011", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1925109.1925111", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Feb 28 16:17:43 MST 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "A basic building block of cloud computing is virtualization. Virtual machines (VMs) encapsulate a user's computing environment and efficiently isolate it from that of other users. VMs, however, are large entities, and no clear APIs exist yet to provide users with programatic, fine-grained control on short time scales. We present SnowFlock, a paradigm and system for cloud computing that introduces VM cloning as a first-class cloud abstraction. VM cloning exploits the well-understood and effective semantics of UNIX fork. We demonstrate multiple usage models of VM cloning: users can incorporate the primitive in their code, can wrap around existing toolchains via scripting, can encapsulate the API within a parallel programming framework, or can use it to load-balance and self-scale clustered servers.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Meisner:2011:PSA, author = "David Meisner and Brian T. Gold and Thomas F. Wenisch", title = "The {PowerNap} Server Architecture", journal = j-TOCS, volume = "29", number = "1", pages = "3:1--3:24", month = feb, year = "2011", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1925109.1925112", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Feb 28 16:17:43 MST 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Data center power consumption is growing to unprecedented levels: the EPA estimates U.S. data centers will consume 100 billion kilowatt hours annually by 2011. Much of this energy is wasted in idle systems: in typical deployments, server utilization is below 30\%, but idle servers still consume 60\% of their peak power draw. Typical idle periods---though frequent---last seconds or less, confounding simple energy-conservation approaches. In this article, we propose PowerNap, an energy-conservation approach where the entire system transitions rapidly between a high-performance active state and a near-zero-power idle state in response to instantaneous load. Rather than requiring fine-grained power-performance states and complex load-proportional operation from individual system components, PowerNap instead calls for minimizing idle power and transition time, which are simpler optimization goals.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Gupta:2011:DTD, author = "Diwaker Gupta and Kashi Venkatesh Vishwanath and Marvin McNett and Amin Vahdat and Ken Yocum and Alex Snoeren and Geoffrey M. Voelker", title = "{DieCast}: Testing Distributed Systems with an Accurate Scale Model", journal = j-TOCS, volume = "29", number = "2", pages = "4:1--4:??", month = may, year = "2011", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1963559.1963560", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon May 9 16:05:59 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Large-scale network services can consist of tens of thousands of machines running thousands of unique software configurations spread across hundreds of physical networks. Testing such services for complex performance problems and configuration errors remains a difficult problem. Existing testing techniques, such as simulation or running smaller instances of a service, have limitations in predicting overall service behavior at such scales. Testing large services should ideally be done at the same scale and configuration as the target deployment, which can be technically and economically infeasible. We present DieCast, an approach to scaling network services in which we multiplex all of the nodes in a given service configuration as virtual machines across a much smaller number of physical machines in a test harness.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Yadgar:2011:MMM, author = "Gala Yadgar and Michael Factor and Kai Li and Assaf Schuster", title = "Management of Multilevel, Multiclient Cache Hierarchies with Application Hints", journal = j-TOCS, volume = "29", number = "2", pages = "5:1--5:??", month = may, year = "2011", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1963559.1963561", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon May 9 16:05:59 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Multilevel caching, common in many storage configurations, introduces new challenges to traditional cache management: data must be kept in the appropriate cache and replication avoided across the various cache levels. Additional challenges are introduced when the lower levels of the hierarchy are shared by multiple clients. Sharing can have both positive and negative effects. While data fetched by one client can be used by another client without incurring additional delays, clients competing for cache buffers can evict each other's blocks and interfere with exclusive caching schemes. We present a global noncentralized, dynamic and informed management policy for multiple levels of cache, accessed by multiple clients.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{deBruijn:2011:ATS, author = "Willem de Bruijn and Herbert Bos and Henri Bal", title = "Application-Tailored {I/O} with {Streamline}", journal = j-TOCS, volume = "29", number = "2", pages = "6:1--6:??", month = may, year = "2011", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/1963559.1963562", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon May 9 16:05:59 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Streamline is a stream-based OS communication subsystem that spans from peripheral hardware to userspace processes. It improves performance of I/O-bound applications (such as webservers and streaming media applications) by constructing tailor-made I/O paths through the operating system for each application at runtime. Path optimization removes unnecessary copying, context switching and cache replacement and integrates specialized hardware. Streamline automates optimization and only presents users a clear, concise job control language based on Unix pipelines. For backward compatibility Streamline also presents well known files, pipes and sockets abstractions.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Ayari:2011:DPR, author = "Brahim Ayari and Abdelmajid Khelil and Neeraj Suri", title = "On the design of perturbation-resilient atomic commit protocols for mobile transactions", journal = j-TOCS, volume = "29", number = "3", pages = "7:1--7:??", month = aug, year = "2011", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2003690.2003691", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Aug 24 18:08:12 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Distributed mobile transactions utilize commit protocols to achieve atomicity and consistent decisions. This is challenging, as mobile environments are typically characterized by frequent perturbations such as network disconnections and node failures. On one hand environmental constraints on mobile participants and wireless links may increase the resource blocking time of fixed participants. On the other hand frequent node and link failures complicate the design of atomic commit protocols by increasing both the transaction abort rate and resource blocking time. Hence, the deployment of classical commit protocols (such as two-phase commit) does not reasonably extend to distributed infrastructure-based mobile environments driving the need for perturbation-resilient commit protocols.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Kalibera:2011:SRT, author = "Tomas Kalibera and Filip Pizlo and Antony L. Hosking and Jan Vitek", title = "Scheduling real-time garbage collection on uniprocessors", journal = j-TOCS, volume = "29", number = "3", pages = "8:1--8:??", month = aug, year = "2011", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2003690.2003692", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Aug 24 18:08:12 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Managed languages such as Java and C\# are increasingly being considered for hard real-time applications because of their productivity and software engineering advantages. Automatic memory management, or garbage collection, is a key enabler for robust, reusable libraries, yet remains a challenge for analysis and implementation of real-time execution environments. This article comprehensively compares leading approaches to hard real-time garbage collection. There are many design decisions involved in selecting a real-time garbage collection algorithm. For time-based garbage collectors on uniprocessors one must choose whether to use periodic, slack-based or hybrid scheduling. A significant impediment to valid experimental comparison of such choices is that commercial implementations use completely different proprietary infrastructures.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Reddi:2011:MPE, author = "Vijay Janapa Reddi and Benjamin C. Lee and Trishul Chilimbi and Kushagra Vaid", title = "Mobile processors for energy-efficient web search", journal = j-TOCS, volume = "29", number = "3", pages = "9:1--9:??", month = aug, year = "2011", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2003690.2003693", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Aug 24 18:08:12 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "As cloud and utility computing spreads, computer architects must ensure continued capability growth for the data centers that comprise the cloud. Given megawatt scale power budgets, increasing data center capability requires increasing computing hardware energy efficiency. To increase the data center's capability for work, the work done per Joule must increase. We pursue this efficiency even as the nature of data center applications evolves. Unlike traditional enterprise workloads, which are typically memory or I/O bound, big data computation and analytics exhibit greater compute intensity. This article examines the efficiency of mobile processors as a means for data center capability. In particular, we compare and contrast the performance and efficiency of the Microsoft Bing search engine executing on the mobile-class Atom processor and the server-class Xeon processor.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Srivatsa:2011:ESA, author = "Mudhakar Srivatsa and Ling Liu and Arun Iyengar", title = "{EventGuard}: a System Architecture for Securing Publish--Subscribe Networks", journal = j-TOCS, volume = "29", number = "4", pages = "10:1--10:??", month = dec, year = "2011", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2063509.2063510", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Dec 30 17:52:02 MST 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Publish-subscribe (pub-sub) is an emerging paradigm for building a large number of distributed systems. A wide area pub-sub system is usually implemented on an overlay network infrastructure to enable information dissemination from publishers to subscribers. Using an open overlay network raises several security concerns such as: confidentiality and integrity, authentication, authorization and Denial-of-Service (DoS) attacks. In this article we present EventGuard, a framework for building secure wide-area pub-sub systems. The EventGuard architecture is comprised of three key components: (1) a suite of security guards that can be seamlessly plugged-into a content-based pub-sub system, (2) a scalable key management algorithm to enforce access control on subscribers, and (3) a resilient pub-sub network design that is capable of scalable routing, handling message dropping-based DoS attacks, and node failures.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Marinescu:2011:ETR, author = "Paul D. Marinescu and George Candea", title = "Efficient Testing of Recovery Code Using Fault Injection", journal = j-TOCS, volume = "29", number = "4", pages = "11:1--11:??", month = dec, year = "2011", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2063509.2063511", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Dec 30 17:52:02 MST 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "A critical part of developing a reliable software system is testing its recovery code. This code is traditionally difficult to test in the lab, and, in the field, it rarely gets to run; yet, when it does run, it must execute flawlessly in order to recover the system from failure. In this article, we present a library-level fault injection engine that enables the productive use of fault injection for software testing. We describe automated techniques for reliably identifying errors that applications may encounter when interacting with their environment, for automatically identifying high-value injection targets in program binaries, and for producing efficient injection test scenarios.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Mahajan:2011:DCS, author = "Prince Mahajan and Srinath Setty and Sangmin Lee and Allen Clement and Lorenzo Alvisi and Mike Dahlin and Michael Walfish", title = "{Depot}: Cloud Storage with Minimal Trust", journal = j-TOCS, volume = "29", number = "4", pages = "12:1--12:??", month = dec, year = "2011", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2063509.2063512", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Dec 30 17:52:02 MST 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "This article describes the design, implementation, and evaluation of Depot, a cloud storage system that minimizes trust assumptions. Depot tolerates buggy or malicious behavior by any number of clients or servers, yet it provides safety and liveness guarantees to correct clients. Depot provides these guarantees using a two-layer architecture. First, Depot ensures that the updates observed by correct nodes are consistently ordered under Fork-Join-Causal consistency (FJC). FJC is a slight weakening of causal consistency that can be both safe and live despite faulty nodes. Second, Depot implements protocols that use this consistent ordering of updates to provide other desirable consistency, staleness, durability, and recovery properties.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Mowry:2012:ISI, author = "Todd C. Mowry", title = "Introduction to Special Issue {APLOS 2011}", journal = j-TOCS, volume = "30", number = "1", pages = "1:1--1:??", month = feb, year = "2012", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2110356.2110357", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Mar 1 16:31:38 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Chipounov:2012:SPD, author = "Vitaly Chipounov and Volodymyr Kuznetsov and George Candea", title = "The {S2E} Platform: Design, Implementation, and Applications", journal = j-TOCS, volume = "30", number = "1", pages = "2:1--2:??", month = feb, year = "2012", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2110356.2110358", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Mar 1 16:31:38 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "This article presents S2E, a platform for analyzing the properties and behavior of software systems, along with its use in developing tools for comprehensive performance profiling, reverse engineering of proprietary software, and automated testing of kernel-mode and user-mode binaries. Conceptually, S2E is an automated path explorer with modular path analyzers: the explorer uses a symbolic execution engine to drive the target system down all execution paths of interest, while analyzers measure and/or check properties of each such path. S2E users can either combine existing analyzers to build custom analysis tools, or they can directly use S2E's APIs. S2E's strength is the ability to scale to large systems, such as a full Windows stack, using two new ideas: selective symbolic execution, a way to automatically minimize the amount of code that has to be executed symbolically given a target analysis, and execution consistency models, a way to make principled performance/accuracy trade-offs \ldots{}", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Veeraraghavan:2012:DPS, author = "Kaushik Veeraraghavan and Dongyoon Lee and Benjamin Wester and Jessica Ouyang and Peter M. Chen and Jason Flinn and Satish Narayanasamy", title = "{DoublePlay}: Parallelizing Sequential Logging and Replay", journal = j-TOCS, volume = "30", number = "1", pages = "3:1--3:??", month = feb, year = "2012", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2110356.2110359", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Mar 1 16:31:38 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Deterministic replay systems record and reproduce the execution of a hardware or software system. In contrast to replaying execution on uniprocessors, deterministic replay on multiprocessors is very challenging to implement efficiently because of the need to reproduce the order of or the values read by shared memory operations performed by multiple threads. In this paper, we present DoublePlay, a new way to efficiently guarantee replay on commodity multiprocessors. Our key insight is that one can use the simpler and faster mechanisms of single-processor record and replay, yet still achieve the scalability offered by multiple cores, by using an additional execution to parallelize the record and replay of an application.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Yuan:2012:ISD, author = "Ding Yuan and Jing Zheng and Soyeon Park and Yuanyuan Zhou and Stefan Savage", title = "Improving Software Diagnosability via Log Enhancement", journal = j-TOCS, volume = "30", number = "1", pages = "4:1--4:??", month = feb, year = "2012", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2110356.2110360", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Mar 1 16:31:38 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Diagnosing software failures in the field is notoriously difficult, in part due to the fundamental complexity of troubleshooting any complex software system, but further exacerbated by the paucity of information that is typically available in the production setting. Indeed, for reasons of both overhead and privacy, it is common that only the run-time log generated by a system (e.g., syslog) can be shared with the developers. Unfortunately, the ad-hoc nature of such reports are frequently insufficient for detailed failure diagnosis. This paper seeks to improve this situation within the rubric of existing practice. We describe a tool, LogEnhancer that automatically ``enhances'' existing logging code to aid in future post-failure debugging.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Schupbach:2012:DLA, author = "Adrian Sch{\"u}pbach and Andrew Baumann and Timothy Roscoe and Simon Peter", title = "A Declarative Language Approach to Device Configuration", journal = j-TOCS, volume = "30", number = "1", pages = "5:1--5:??", month = feb, year = "2012", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2110356.2110361", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Mar 1 16:31:38 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "C remains the language of choice for hardware programming (device drivers, bus configuration, etc.): it is fast, allows low-level access, and is trusted by OS developers. However, the algorithms required to configure and reconfigure hardware devices and interconnects are becoming more complex and diverse, with the added burden of legacy support, ``quirks,'' and hardware bugs to work around. Even programming PCI bridges in a modern PC is a surprisingly complex problem, and is getting worse as new functionality such as hotplug appears. Existing approaches use relatively simple algorithms, hard-coded in C and closely coupled with low-level register access code, generally leading to suboptimal configurations.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Saez:2012:LCS, author = "Juan Carlos Saez and Alexandra Fedorova and David Koufaty and Manuel Prieto", title = "Leveraging Core Specialization via {OS} Scheduling to Improve Performance on Asymmetric Multicore Systems", journal = j-TOCS, volume = "30", number = "2", pages = "6:1--6:??", month = apr, year = "2012", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2166879.2166880", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Apr 27 12:10:22 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Asymmetric multicore processors (AMPs) consist of cores with the same ISA (instruction-set architecture), but different microarchitectural features, speed, and power consumption. Because cores with more complex features and higher speed typically use more area and consume more energy relative to simpler and slower cores, we must use these cores for running applications that experience significant performance improvements from using those features. Having cores of different types in a single system allows optimizing the performance/energy trade-off. To deliver this potential to unmodified applications, the OS scheduler must map threads to cores in consideration of the properties of both. Our work describes a Comprehensive scheduler for Asymmetric Multicore Processors (CAMP) that addresses shortcomings of previous asymmetry-aware schedulers. First, previous schedulers catered to only one kind of workload properties that are crucial for scheduling on AMPs; either efficiency or thread-level parallelism (TLP), but not both. CAMP overcomes this limitation showing how using both efficiency and TLP in synergy in a single scheduling algorithm can improve performance. Second, most existing schedulers relying on models for estimating how much faster a thread executes on a ``fast'' vs. ``slow'' core (i.e., the speedup factor ) were specifically designed for AMP systems where cores differ only in clock frequency. However, more realistic AMP systems include cores that differ more significantly in their features. To demonstrate the effectiveness of CAMP on more realistic scenarios, we augmented the CAMP scheduler with a model that predicts the speedup factor on a real AMP prototype that closely matches future asymmetric systems.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Ebrahimi:2012:FST, author = "Eiman Ebrahimi and Chang Joo Lee and Onur Mutlu and Yale N. Patt", title = "Fairness via Source Throttling: a Configurable and High-Performance Fairness Substrate for Multicore Memory Systems", journal = j-TOCS, volume = "30", number = "2", pages = "7:1--7:??", month = apr, year = "2012", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2166879.2166881", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Apr 27 12:10:22 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Cores in chip-multiprocessors (CMPs) share multiple memory subsystem resources. If resource sharing is unfair, some applications can be delayed significantly while others are unfairly prioritized. Previous research proposed separate fairness mechanisms for each resource. Such resource-based fairness mechanisms implemented independently in each resource can make contradictory decisions, leading to low fairness and performance loss. Therefore, a coordinated mechanism that provides fairness in the entire shared memory system is desirable. This article proposes a new approach that provides fairness in the entire shared memory system, thereby eliminating the need for and complexity of developing fairness mechanisms for each resource. Our technique, Fairness via Source Throttling (FST), estimates unfairness in the entire memory system. If unfairness is above a system-software-set threshold, FST throttles down cores causing unfairness by limiting the number of requests they create and the frequency at which they do. As such, our source-based fairness control ensures fairness decisions are made in tandem in the entire memory system. FST enforces thread priorities/weights, and enables system-software to enforce different fairness objectives in the memory system. Our evaluations show that FST provides the best system fairness and performance compared to three systems with state-of-the-art fairness mechanisms implemented in both shared caches and memory controllers.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Gebhart:2012:HTS, author = "Mark Gebhart and Daniel R. Johnson and David Tarjan and Stephen W. Keckler and William J. Dally and Erik Lindholm and Kevin Skadron", title = "A Hierarchical Thread Scheduler and Register File for Energy-Efficient Throughput Processors", journal = j-TOCS, volume = "30", number = "2", pages = "8:1--8:??", month = apr, year = "2012", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2166879.2166882", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Apr 27 12:10:22 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Modern graphics processing units (GPUs) employ a large number of hardware threads to hide both function unit and memory access latency. Extreme multithreading requires a complex thread scheduler as well as a large register file, which is expensive to access both in terms of energy and latency. We present two complementary techniques for reducing energy on massively-threaded processors such as GPUs. First, we investigate a two-level thread scheduler that maintains a small set of active threads to hide ALU and local memory access latency and a larger set of pending threads to hide main memory latency. Reducing the number of threads that the scheduler must consider each cycle improves the scheduler's energy efficiency. Second, we propose replacing the monolithic register file found on modern designs with a hierarchical register file. We explore various trade-offs for the hierarchy including the number of levels in the hierarchy and the number of entries at each level. We consider both a hardware-managed caching scheme and a software-managed scheme, where the compiler is responsible for orchestrating all data movement within the register file hierarchy. Combined with a hierarchical register file, our two-level thread scheduler provides a further reduction in energy by only allocating entries in the upper levels of the register file hierarchy for active threads. Averaging across a variety of real world graphics and compute workloads, the active thread count can be reduced by a factor of 4 with minimal impact on performance and our most efficient three-level software-managed register file hierarchy reduces register file energy by 54\%.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Dall:2012:DIE, author = "Christoffer Dall and Jeremy Andrus and Alexander Van't Hof and Oren Laadan and Jason Nieh", title = "The Design, Implementation, and Evaluation of Cells: a Virtual {Smartphone} Architecture", journal = j-TOCS, volume = "30", number = "3", pages = "9:1--9:??", month = aug, year = "2012", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2324876.2324877", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Aug 20 16:33:58 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Smartphones are increasingly ubiquitous, and many users carry multiple phones to accommodate work, personal, and geographic mobility needs. We present Cells, a virtualization architecture for enabling multiple virtual smartphones to run simultaneously on the same physical cellphone in an isolated, secure manner. Cells introduces a usage model of having one foreground virtual phone and multiple background virtual phones. This model enables a new device namespace mechanism and novel device proxies that integrate with lightweight operating system virtualization to multiplex phone hardware across multiple virtual phones while providing native hardware device performance. Cells virtual phone features include fully accelerated 3D graphics, complete power management features, and full telephony functionality with separately assignable telephone numbers and caller ID support. We have implemented a prototype of Cells that supports multiple Android virtual phones on the same phone. Our performance results demonstrate that Cells imposes only modest runtime and memory overhead, works seamlessly across multiple hardware devices including Google Nexus 1 and Nexus S phones, and transparently runs Android applications at native speed without any modifications.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Harter:2012:FFU, author = "Tyler Harter and Chris Dragga and Michael Vaughn and Andrea C. Arpaci-Dusseau and Remzi H. Arpaci-Dusseau", title = "A File Is Not a File: Understanding the {I/O} Behavior of {Apple} Desktop Applications", journal = j-TOCS, volume = "30", number = "3", pages = "10:1--10:??", month = aug, year = "2012", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2324876.2324878", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Aug 20 16:33:58 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "We analyze the I/O behavior of iBench, a new collection of productivity and multimedia application workloads. Our analysis reveals a number of differences between iBench and typical file-system workload studies, including the complex organization of modern files, the lack of pure sequential access, the influence of underlying frameworks on I/O patterns, the widespread use of file synchronization and atomic operations, and the prevalence of threads. Our results have strong ramifications for the design of next generation local and cloud-based storage systems.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Esmaeilzadeh:2012:PLD, author = "Hadi Esmaeilzadeh and Emily Blem and Ren{\'e}e {St. Amant} and Karthikeyan Sankaralingam and Doug Burger", title = "Power Limitations and Dark Silicon Challenge the Future of Multicore", journal = j-TOCS, volume = "30", number = "3", pages = "11:1--11:??", month = aug, year = "2012", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2324876.2324879", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Aug 20 16:33:58 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Since 2004, processor designers have increased core counts to exploit Moore's Law scaling, rather than focusing on single-core performance. The failure of Dennard scaling, to which the shift to multicore parts is partially a response, may soon limit multicore scaling just as single-core scaling has been curtailed. This paper models multicore scaling limits by combining device scaling, single-core scaling, and multicore scaling to measure the speedup potential for a set of parallel workloads for the next five technology generations. For device scaling, we use both the ITRS projections and a set of more conservative device scaling parameters. To model single-core scaling, we combine measurements from over 150 processors to derive Pareto-optimal frontiers for area/performance and power/performance. Finally, to model multicore scaling, we build a detailed performance model of upper-bound performance and lower-bound core power. The multicore designs we study include single-threaded CPU-like and massively threaded GPU-like multicore chip organizations with symmetric, asymmetric, dynamic, and composed topologies. The study shows that regardless of chip organization and topology, multicore scaling is power limited to a degree not widely appreciated by the computing community. Even at 22 nm (just one year from now), 21\% of a fixed-size chip must be powered off, and at 8 nm, this number grows to more than 50\%. Through 2024, only 7.9$\times$ average speedup is possible across commonly used parallel workloads for the topologies we study, leaving a nearly 24-fold gap from a target of doubled performance per generation.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Bugnion:2012:BVX, author = "Edouard Bugnion and Scott Devine and Mendel Rosenblum and Jeremy Sugerman and Edward Y. Wang", title = "Bringing Virtualization to the x86 Architecture with the Original {VMware} Workstation", journal = j-TOCS, volume = "30", number = "4", pages = "12:1--12:51", month = nov, year = "2012", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2382553.2382554", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Nov 29 19:34:49 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "This article describes the historical context, technical challenges, and main implementation techniques used by VMware Workstation to bring virtualization to the x86 architecture in 1999. Although virtual machine monitors (VMMs) had been around for decades, they were traditionally designed as part of monolithic, single-vendor architectures with explicit support for virtualization. In contrast, the x86 architecture lacked virtualization support, and the industry around it had disaggregated into an ecosystem, with different vendors controlling the computers, CPUs, peripherals, operating systems, and applications, none of them asking for virtualization. We chose to build our solution independently of these vendors. As a result, VMware Workstation had to deal with new challenges associated with (i) the lack of virtualization support in the x86 architecture, (ii) the daunting complexity of the architecture itself, (iii) the need to support a broad combination of peripherals, and (iv) the need to offer a simple user experience within existing environments. These new challenges led us to a novel combination of well-known virtualization techniques, techniques from other domains, and new techniques. VMware Workstation combined a hosted architecture with a VMM. The hosted architecture enabled a simple user experience and offered broad hardware compatibility. Rather than exposing I/O diversity to the virtual machines, VMware Workstation also relied on software emulation of I/O devices. The VMM combined a trap-and-emulate direct execution engine with a system-level dynamic binary translator to efficiently virtualize the x86 architecture and support most commodity operating systems. By relying on x86 hardware segmentation as a protection mechanism, the binary translator could execute translated code at near hardware speeds. The binary translator also relied on partial evaluation and adaptive retranslation to reduce the overall overheads of virtualization. Written with the benefit of hindsight, this article shares the key lessons we learned from building the original system and from its later evolution.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Erlingsson:2012:FED, author = "{\'U}lfar Erlingsson and Marcus Peinado and Simon Peter and Mihai Budiu and Gloria Mainar-Ruiz", title = "{Fay}: Extensible Distributed Tracing from Kernels to Clusters", journal = j-TOCS, volume = "30", number = "4", pages = "13:1--13:??", month = nov, year = "2012", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2382553.2382555", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Nov 29 19:34:49 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Fay is a flexible platform for the efficient collection, processing, and analysis of software execution traces. Fay provides dynamic tracing through use of runtime instrumentation and distributed aggregation within machines and across clusters. At the lowest level, Fay can be safely extended with new tracing primitives, including even untrusted, fully optimized machine code, and Fay can be applied to running user-mode or kernel-mode software without compromising system stability. At the highest level, Fay provides a unified, declarative means of specifying what events to trace, as well as the aggregation, processing, and analysis of those events. We have implemented the Fay tracing platform for Windows and integrated it with two powerful, expressive systems for distributed programming. Our implementation is easy to use, can be applied to unmodified production systems, and provides primitives that allow the overhead of tracing to be greatly reduced, compared to previous dynamic tracing platforms. To show the generality of Fay tracing, we reimplement, in experiments, a range of tracing strategies and several custom mechanisms from existing tracing frameworks. Fay shows that modern techniques for high-level querying and data-parallel processing of disagreggated data streams are well suited to comprehensive monitoring of software execution in distributed systems. Revisiting a lesson from the late 1960s [Deutsch and Grant 1971], Fay also demonstrates the efficiency and extensibility benefits of using safe, statically verified machine code as the basis for low-level execution tracing. Finally, Fay establishes that, by automatically deriving optimized query plans and code for safe extensions, the expressiveness and performance of high-level tracing queries can equal or even surpass that of specialized monitoring tools.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Gandhi:2012:ADR, author = "Anshul Gandhi and Mor Harchol-Balter and Ram Raghunathan and Michael A. Kozuch", title = "{AutoScale}: Dynamic, Robust Capacity Management for Multi-Tier Data Centers", journal = j-TOCS, volume = "30", number = "4", pages = "14:1--14:??", month = nov, year = "2012", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2382553.2382556", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Nov 29 19:34:49 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Energy costs for data centers continue to rise, already exceeding \$15 billion yearly. Sadly much of this power is wasted. Servers are only busy 10--30\% of the time on average, but they are often left on, while idle, utilizing 60\% or more of peak power when in the idle state. We introduce a dynamic capacity management policy, AutoScale, that greatly reduces the number of servers needed in data centers driven by unpredictable, time-varying load, while meeting response time SLAs. AutoScale scales the data center capacity, adding or removing servers as needed. AutoScale has two key features: (i) it autonomically maintains just the right amount of spare capacity to handle bursts in the request rate; and (ii) it is robust not just to changes in the request rate of real-world traces, but also request size and server efficiency. We evaluate our dynamic capacity management approach via implementation on a 38-server multi-tier data center, serving a web site of the type seen in Facebook or Amazon, with a key-value store workload. We demonstrate that AutoScale vastly improves upon existing dynamic capacity management policies with respect to meeting SLAs and robustness.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Ferdman:2012:QMB, author = "Michael Ferdman and Almutaz Adileh and Onur Kocberber and Stavros Volos and Mohammad Alisafaee and Djordje Jevdjic and Cansu Kaynak and Adrian Daniel Popescu and Anastasia Ailamaki and Babak Falsafi", title = "Quantifying the Mismatch between Emerging Scale-Out Applications and Modern Processors", journal = j-TOCS, volume = "30", number = "4", pages = "15:1--15:??", month = nov, year = "2012", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2382553.2382557", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Nov 29 19:34:49 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Emerging scale-out workloads require extensive amounts of computational resources. However, data centers using modern server hardware face physical constraints in space and power, limiting further expansion and calling for improvements in the computational density per server and in the per-operation energy. Continuing to improve the computational resources of the cloud while staying within physical constraints mandates optimizing server efficiency to ensure that server hardware closely matches the needs of scale-out workloads. In this work, we introduce CloudSuite, a benchmark suite of emerging scale-out workloads. We use performance counters on modern servers to study scale-out workloads, finding that today's predominant processor microarchitecture is inefficient for running these workloads. We find that inefficiency comes from the mismatch between the workload needs and modern processors, particularly in the organization of instruction and data memory systems and the processor core microarchitecture. Moreover, while today's predominant microarchitecture is inefficient when executing scale-out workloads, we find that continuing the current trends will further exacerbate the inefficiency in the future. In this work, we identify the key microarchitectural needs of scale-out workloads, calling for a change in the trajectory of server processors that would lead to improved computational density and power efficiency in data centers.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Wu:2013:ERD, author = "Meng-Ju Wu and Donald Yeung", title = "Efficient Reuse Distance Analysis of Multicore Scaling for Loop-Based Parallel Programs", journal = j-TOCS, volume = "31", number = "1", pages = "1:1--1:??", month = feb, year = "2013", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2427631.2427632", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Feb 23 06:37:57 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Reuse Distance (RD) analysis is a powerful memory analysis tool that can potentially help architects study multicore processor scaling. One key obstacle, however, is that multicore RD analysis requires measuring Concurrent Reuse Distance (CRD) and Private-LRU-stack Reuse Distance (PRD) profiles across thread-interleaved memory reference streams. Sensitivity to memory interleaving makes CRD and PRD profiles architecture dependent, preventing them from analyzing different processor configurations. For loop-based parallel programs, CRD and PRD profiles shift coherently across RD values with core count scaling because interleaving threads are symmetric. Simple techniques can predict such shifting, making the analysis of numerous multicore configurations from a small set of CRD and PRD profiles feasible. Given the ubiquity of parallel loops, such techniques will be extremely valuable for studying future large multicore designs. This article investigates using RD analysis to efficiently analyze multicore cache performance for loop-based parallel programs, making several contributions. First, we provide an in-depth analysis on how CRD and PRD profiles change with core count scaling. Second, we develop techniques to predict CRD and PRD profile scaling, in particular employing reference groups [Zhong et al. 2003] to predict coherent shift, demonstrating 90\% or greater prediction accuracy. Third, our CRD and PRD profile analyses define two application parameters with architectural implications: C$_{core}$ is the minimum shared cache capacity that ``contains'' locality degradation due to core count scaling, and C$_{share}$ is the capacity at which shared caches begin to provide a cache-miss reduction compared to private caches. And fourth, we apply CRD and PRD profiles to analyze multicore cache performance. When combined with existing problem scaling prediction, our techniques can predict shared LLC MPKI (private L2 cache MPKI) to within 10.7\% (13.9\% ) of simulation across 1,728 (1,440) configurations using only 36 measured CRD (PRD) profiles.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Govindan:2013:ADP, author = "Sriram Govindan and Di Wang and Anand Sivasubramaniam and Bhuvan Urgaonkar", title = "Aggressive Datacenter Power Provisioning with Batteries", journal = j-TOCS, volume = "31", number = "1", pages = "2:1--2:??", month = feb, year = "2013", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2427631.2427633", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Feb 23 06:37:57 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Datacenters spend \$10--\$25 per watt in provisioning their power infrastructure, regardless of the watts actually consumed. Since peak power needs arise rarely, provisioning power infrastructure for them can be expensive. One can, thus, aggressively underprovision infrastructure assuming that simultaneous peak draw across all equipment will happen rarely. The resulting nonzero probability of emergency events where power needs exceed provisioned capacity, however small, mandates graceful reaction mechanisms to cap the power draw instead of leaving it to disruptive circuit breakers/fuses. Existing strategies for power capping use temporal knobs local to a server that throttle the rate of execution (using power modes), and/or spatial knobs that redirect/migrate excess load to regions of the datacenter with more power headroom. We show these mechanisms to have performance degrading ramifications, and propose an entirely orthogonal solution that leverages existing UPS batteries to temporarily augment the utility supply during emergencies.We build an experimental prototype to demonstrate such power capping on a cluster of 8 servers, each with an individual battery, and implement several online heuristics in the context of different datacenter workloads to evaluate their effectiveness in handling power emergencies. We show that our battery-based solution can: (i) handle emergencies of short durations on its own, (ii) supplement existing reaction mechanisms to enhance their efficacy for longer emergencies, and (iii) create more slack for shifting applications temporarily to nonpeak durations.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Rasmussen:2013:TBE, author = "Alexander Rasmussen and George Porter and Michael Conley and Harsha V. Madhyastha and Radhika Niranjan Mysore and Alexander Pucher and Amin Vahdat", title = "{TritonSort}: a Balanced and Energy-Efficient Large-Scale Sorting System", journal = j-TOCS, volume = "31", number = "1", pages = "3:1--3:??", month = feb, year = "2013", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2427631.2427634", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Feb 23 06:37:57 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "We present TritonSort, a highly efficient, scalable sorting system. It is designed to process large datasets, and has been evaluated against as much as 100TB of input data spread across 832 disks in 52 nodes at a rate of 0.938TB/min. When evaluated against the annual Indy GraySort sorting benchmark, TritonSort is 66\% better in absolute performance and has over six times the per-node throughput of the previous record holder. When evaluated against the 100TB Indy JouleSort benchmark, TritonSort sorted 9703 records/Joule. In this article, we describe the hardware and software architecture necessary to operate TritonSort at this level of efficiency. Through careful management of system resources to ensure cross-resource balance, we are able to sort data at approximately 80\% of the disks' aggregate sequential write speed. We believe the work holds a number of lessons for balanced system design and for scale-out architectures in general. While many interesting systems are able to scale linearly with additional servers, per-server performance can lag behind per-server capacity by more than an order of magnitude. Bridging the gap between high scalability and high performance would enable either significantly less expensive systems that are able to do the same work or provide the ability to address significantly larger problem sets with the same infrastructure.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Jayaram:2013:PCB, author = "K. R. Jayaram and Patrick Eugster and Chamikara Jayalath", title = "Parametric Content-Based Publish\slash Subscribe", journal = j-TOCS, volume = "31", number = "2", pages = "4:1--4:??", month = may, year = "2013", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2465346.2465347", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Jun 1 11:24:04 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Content-based publish/subscribe (CPS) is an appealing abstraction for building scalable distributed systems, e.g., message boards, intrusion detectors, or algorithmic stock trading platforms. Recently, CPS extensions have been proposed for location-based services like vehicular networks, mobile social networking, and so on. Although current CPS middleware systems are dynamic in the way they support the joining and leaving of publishers and subscribers, they fall short in supporting subscription adaptations. These are becoming increasingly important across many CPS applications. In algorithmic high frequency trading, for instance, stock price thresholds that are of interest to a trader change rapidly, and gains directly hinge on the reaction time to relevant fluctuations rather than fixed values. In location-aware applications, a subscription is a function of the subscriber location (e.g. GPS coordinates), which inherently changes during motion. The common solution for adapting a subscription consists of a resubscription, where a new subscription is issued and the superseded one canceled. This incurs substantial overhead in CPS middleware systems, and leads to missed or duplicated events during the transition. In this article, we explore the concept of parametric subscriptions for capturing subscription adaptations. We discuss desirable and feasible guarantees for corresponding support, and propose novel algorithms for updating routing mechanisms effectively and efficiently in classic decentralized CPS broker overlay networks. Compared to resubscriptions, our algorithms significantly improve the reaction time to subscription updates without hampering throughput or latency under high update rates. We also propose and evaluate approximation techniques to detect and mitigate pathological cases of high frequency subscription oscillations, which could significantly decrease the throughput of CPS systems thereby affecting other subscribers. We analyze the benefits of our support through implementations of our algorithms in two CPS systems, and by evaluating our algorithms on two different application scenarios.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Smaldone:2013:OSP, author = "Stephen Smaldone and Benjamin Gilbert and Jan Harkes and Liviu Iftode and Mahadev Satyanarayanan", title = "Optimizing Storage Performance for {VM}-Based Mobile Computing", journal = j-TOCS, volume = "31", number = "2", pages = "5:1--5:??", month = may, year = "2013", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2465346.2465348", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Jun 1 11:24:04 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "This article investigates the transient use of free local storage for improving performance in VM-based mobile computing systems implemented as thick clients on host PCs. We use the term TransientPC systems to refer to these types of systems. The solution we propose, called TransPart, uses the higher-performing local storage of host hardware to speed up performance-critical operations. Our solution constructs a virtual storage device on demand (which we call transient storage ) by borrowing free disk blocks from the host's storage. In this article, we present the design, implementation, and evaluation of a TransPart prototype, which requires no modifications to the software or hardware of a host computer. Experimental results confirm that TransPart offers low overhead and startup cost, while improving user experience.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Lee:2013:ETB, author = "Yunsup Lee and Rimas Avizienis and Alex Bishara and Richard Xia and Derek Lockhart and Christopher Batten and Krste Asanovi{\'c}", title = "Exploring the Tradeoffs between Programmability and Efficiency in Data-Parallel Accelerators", journal = j-TOCS, volume = "31", number = "3", pages = "6:1--6:??", month = aug, year = "2013", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2491464", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Aug 28 17:03:36 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "We present a taxonomy and modular implementation approach for data-parallel accelerators, including the MIMD, vector-SIMD, subword-SIMD, SIMT, and vector-thread (VT) architectural design patterns. We introduce Maven, a new VT microarchitecture based on the traditional vector-SIMD microarchitecture, that is considerably simpler to implement and easier to program than previous VT designs. Using an extensive design-space exploration of full VLSI implementations of many accelerator design points, we evaluate the varying tradeoffs between programmability and implementation efficiency among the MIMD, vector-SIMD, and VT patterns on a workload of compiled microbenchmarks and application kernels. We find the vector cores provide greater efficiency than the MIMD cores, even on fairly irregular kernels. Our results suggest that the Maven VT microarchitecture is superior to the traditional vector-SIMD architecture, providing both greater efficiency and easier programmability.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Gamage:2013:PRO, author = "Sahan Gamage and Ramana Rao Kompella and Dongyan Xu and Ardalan Kangarlou", title = "Protocol Responsibility Offloading to Improve {TCP} Throughput in Virtualized Environments", journal = j-TOCS, volume = "31", number = "3", pages = "7:1--7:??", month = aug, year = "2013", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2491463", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Aug 28 17:03:36 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Virtualization is a key technology that powers cloud computing platforms such as Amazon EC2. Virtual machine (VM) consolidation, where multiple VMs share a physical host, has seen rapid adoption in practice, with increasingly large numbers of VMs per machine and per CPU core. Our investigations, however, suggest that the increasing degree of VM consolidation has serious negative effects on the VMs' TCP performance. As multiple VMs share a given CPU, the scheduling latencies, which can be in the order of tens of milliseconds, substantially increase the typically submillisecond round-trip times (RTTs) for TCP connections in a datacenter, causing significant degradation in throughput. In this article, we propose a lightweight solution, called vPRO, that (a) offloads the VM's TCP congestion control function to the driver domain to improve TCP transmit performance; and (b) offloads TCP acknowledgment functionality to the driver domain to improve the TCP receive performance. Our evaluation of a vPRO prototype on Xen suggests that vPRO substantially improves TCP receive and transmit throughputs with minimal per-packet CPU overhead. We further show that the higher TCP throughput leads to improvement in application-level performance, via experiments with Apache Olio, a Web 2.0 cloud application, and Intel MPI benchmark.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Corbett:2013:SGG, author = "James C. Corbett and Jeffrey Dean and Michael Epstein and Andrew Fikes and Christopher Frost and J. J. Furman and Sanjay Ghemawat and Andrey Gubarev and Christopher Heiser and Peter Hochschild and Wilson Hsieh and Sebastian Kanthak and Eugene Kogan and Hongyi Li and Alexander Lloyd and Sergey Melnik and David Mwaura and David Nagle and Sean Quinlan and Rajesh Rao and Lindsay Rolig and Yasushi Saito and Michal Szymaniak and Christopher Taylor and Ruth Wang and Dale Woodford", title = "{Spanner}: {Google}'s Globally Distributed Database", journal = j-TOCS, volume = "31", number = "3", pages = "8:1--8:??", month = aug, year = "2013", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2491245", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Aug 28 17:03:36 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Spanner is Google's scalable, multiversion, globally distributed, and synchronously replicated database. It is the first system to distribute data at global scale and support externally-consistent distributed transactions. This article describes how Spanner is structured, its feature set, the rationale underlying various design decisions, and a novel time API that exposes clock uncertainty. This API and its implementation are critical to supporting external consistency and a variety of powerful features: nonblocking reads in the past, lock-free snapshot transactions, and atomic schema changes, across all of Spanner.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Mowry:2013:E, author = "Todd C. Mowry", title = "Editorial", journal = j-TOCS, volume = "31", number = "4", pages = "9:1--9:??", month = dec, year = "2013", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2542150.2542151", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Dec 17 17:17:06 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Balakrishnan:2013:CDS, author = "Mahesh Balakrishnan and Dahlia Malkhi and John D. Davis and Vijayan Prabhakaran and Michael Wei and Ted Wobber", title = "{CORFU}: a distributed shared log", journal = j-TOCS, volume = "31", number = "4", pages = "10:1--10:??", month = dec, year = "2013", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2535930", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Dec 17 17:17:06 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "CORFU is a global log which clients can append-to and read-from over a network. Internally, CORFU is distributed over a cluster of machines in such a way that there is no single I/O bottleneck to either appends or reads. Data is fully replicated for fault tolerance, and a modest cluster of about 16--32 machines with SSD drives can sustain 1 million 4-KByte operations per second. The CORFU log enabled the construction of a variety of distributed applications that require strong consistency at high speeds, such as databases, transactional key-value stores, replicated state machines, and metadata services.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Bojnordi:2013:PMC, author = "Mahdi Nazm Bojnordi and Engin Ipek", title = "A programmable memory controller for the {DDRx} interfacing standards", journal = j-TOCS, volume = "31", number = "4", pages = "11:1--11:??", month = dec, year = "2013", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2534845", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Dec 17 17:17:06 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Modern memory controllers employ sophisticated address mapping, command scheduling, and power management optimizations to alleviate the adverse effects of DRAM timing and resource constraints on system performance. A promising way of improving the versatility and efficiency of these controllers is to make them programmable-a proven technique that has seen wide use in other control tasks, ranging from DMA scheduling to NAND Flash and directory control. Unfortunately, the stringent latency and throughput requirements of modern DDRx devices have rendered such programmability largely impractical, confining DDRx controllers to fixed-function hardware. This article presents the instruction set architecture (ISA) and hardware implementation of PARDIS, a programmable memory controller that can meet the performance requirements of a high-speed DDRx interface. The proposed controller is evaluated by mapping previously proposed DRAM scheduling, address mapping, refresh scheduling, and power management algorithms onto PARDIS. Simulation results show that the average performance of PARDIS comes within 8\% of fixed-function hardware for each of these techniques; moreover, by enabling application-specific optimizations, PARDIS improves system performance by 6 to 17\% and reduces DRAM energy by 9 to 22\% over four existing memory controllers.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Delimitrou:2013:QAS, author = "Christina Delimitrou and Christos Kozyrakis", title = "{QoS}-Aware scheduling in heterogeneous datacenters with {Paragon}", journal = j-TOCS, volume = "31", number = "4", pages = "12:1--12:??", month = dec, year = "2013", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2556583", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Dec 17 17:17:06 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Large-scale datacenters (DCs) host tens of thousands of diverse applications each day. However, interference between colocated workloads and the difficulty of matching applications to one of the many hardware platforms available can degrade performance, violating the quality of service (QoS) guarantees that many cloud workloads require. While previous work has identified the impact of heterogeneity and interference, existing solutions are computationally intensive, cannot be applied online, and do not scale beyond a few applications. We present Paragon, an online and scalable DC scheduler that is heterogeneity- and interference-aware. Paragon is derived from robust analytical methods, and instead of profiling each application in detail, it leverages information the system already has about applications it has previously seen. It uses collaborative filtering techniques to quickly and accurately classify an unknown incoming workload with respect to heterogeneity and interference in multiple shared resources. It does so by identifying similarities to previously scheduled applications. The classification allows Paragon to greedily schedule applications in a manner that minimizes interference and maximizes server utilization. After the initial application placement, Paragon monitors application behavior and adjusts the scheduling decisions at runtime to avoid performance degradations. Additionally, we design ARQ, a multiclass admission control protocol that constrains application waiting time. ARQ queues applications in separate classes based on the type of resources they need and avoids long queueing delays for easy-to-satisfy workloads in highly-loaded scenarios. Paragon scales to tens of thousands of servers and applications with marginal scheduling overheads in terms of time or state. We evaluate Paragon with a wide range of workload scenarios, on both small and large-scale systems, including 1,000 servers on EC2. For a 2,500-workload scenario, Paragon enforces performance guarantees for 91\% of applications, while significantly improving utilization. In comparison, heterogeneity-oblivious, interference-oblivious, and least-loaded schedulers only provide similar guarantees for 14\%, 11\%, and 3\% of workloads. The differences are more striking in oversubscribed scenarios where resource efficiency is more critical.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Silberstein:2014:GIF, author = "Mark Silberstein and Bryan Ford and Idit Keidar and Emmett Witchel", title = "{GPUfs}: Integrating a file system with {GPUs}", journal = j-TOCS, volume = "32", number = "1", pages = "1:1--1:??", month = feb, year = "2014", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2553081", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Feb 27 12:15:46 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "As GPU hardware becomes increasingly general-purpose, it is quickly outgrowing the traditional, constrained GPU-as-coprocessor programming model. This article advocates for extending standard operating system services and abstractions to GPUs in order to facilitate program development and enable harmonious integration of GPUs in computing systems. As an example, we describe the design and implementation of GPUFs, a software layer which provides operating system support for accessing host files directly from GPU programs. GPUFs provides a POSIX-like API, exploits GPU parallelism for efficiency, and optimizes GPU file access by extending the host CPU's buffer cache into GPU memory. Our experiments, based on a set of real benchmarks adapted to use our file system, demonstrate the feasibility and benefits of the GPUFs approach. For example, a self-contained GPU program that searches for a set of strings throughout the Linux kernel source tree runs over seven times faster than on an eight-core CPU.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Klein:2014:CFV, author = "Gerwin Klein and June Andronick and Kevin Elphinstone and Toby Murray and Thomas Sewell and Rafal Kolanski and Gernot Heiser", title = "Comprehensive formal verification of an {OS} microkernel", journal = j-TOCS, volume = "32", number = "1", pages = "2:1--2:??", month = feb, year = "2014", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2560537", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Feb 27 12:15:46 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "We present an in-depth coverage of the comprehensive machine-checked formal verification of seL4, a general-purpose operating system microkernel. We discuss the kernel design we used to make its verification tractable. We then describe the functional correctness proof of the kernel's C implementation and we cover further steps that transform this result into a comprehensive formal verification of the kernel: a formally verified IPC fastpath, a proof that the binary code of the kernel correctly implements the C semantics, a proof of correct access-control enforcement, a proof of information-flow noninterference, a sound worst-case execution time analysis of the binary, and an automatic initialiser for user-level systems that connects kernel-level access-control enforcement with reasoning about system behaviour. We summarise these results and show how they integrate to form a coherent overall analysis, backed by machine-checked, end-to-end theorems. The seL4 microkernel is currently not just the only general-purpose operating system kernel that is fully formally verified to this degree. It is also the only example of formal proof of this scale that is kept current as the requirements, design and implementation of the system evolve over almost a decade. We report on our experience in maintaining this evolving formally verified code base.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Guevara:2014:MMM, author = "Marisabel Guevara and Benjamin Lubin and Benjamin C. Lee", title = "Market mechanisms for managing datacenters with heterogeneous microarchitectures", journal = j-TOCS, volume = "32", number = "1", pages = "3:1--3:??", month = feb, year = "2014", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2541258", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Feb 27 12:15:46 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Specialization of datacenter resources brings performance and energy improvements in response to the growing scale and diversity of cloud applications. Yet heterogeneous hardware adds complexity and volatility to latency-sensitive applications. A resource allocation mechanism that leverages architectural principles can overcome both of these obstacles. We integrate research in heterogeneous architectures with recent advances in multi-agent systems. Embedding architectural insight into proxies that bid on behalf of applications, a market effectively allocates hardware to applications with diverse preferences and valuations. Exploring a space of heterogeneous datacenter configurations, which mix server-class Xeon and mobile-class Atom processors, we find an optimal heterogeneous balance that improves both welfare and energy-efficiency. We further design and evaluate twelve design points along the Xeon-to-Atom spectrum, and find that a mix of three processor architectures achieves a $ 12 \times $ reduction in response time violations relative to equal-power homogeneous systems.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Palix:2014:FL, author = "Nicolas Palix and Gael Thomas and Suman Saha and Christophe Calv{\`e}s and Gilles Muller and Julia Lawall", title = "Faults in {Linux 2.6}", journal = j-TOCS, volume = "32", number = "2", pages = "4:1--4:??", month = jun, year = "2014", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2619090", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Jul 7 16:54:52 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/linux.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib", abstract = "In August 2011, Linux entered its third decade. Ten years before, Chou et al. published a study of faults found by applying a static analyzer to Linux versions 1.0 through 2.4.1. A major result of their work was that the drivers directory contained up to 7 times more of certain kinds of faults than other directories. This result inspired numerous efforts on improving the reliability of driver code. Today, Linux is used in a wider range of environments, provides a wider range of services, and has adopted a new development and release model. What has been the impact of these changes on code quality? To answer this question, we have transported Chou et al.'s experiments to all versions of Linux 2.6 released between 2003 and 2011. We find that Linux has more than doubled in size during this period, but the number of faults per line of code has been decreasing. Moreover, the fault rate of drivers is now below that of other directories, such as arch. These results can guide further development and research efforts for the decade to come. To allow updating these results as Linux evolves, we define our experimental protocol and make our checkers available.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Enck:2014:TIF, author = "William Enck and Peter Gilbert and Seungyeop Han and Vasant Tendulkar and Byung-Gon Chun and Landon P. Cox and Jaeyeon Jung and Patrick McDaniel and Anmol N. Sheth", title = "{TaintDroid}: an Information-Flow Tracking System for Realtime Privacy Monitoring on {Smartphones}", journal = j-TOCS, volume = "32", number = "2", pages = "5:1--5:??", month = jun, year = "2014", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2619091", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Jul 7 16:54:52 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Today's smartphone operating systems frequently fail to provide users with visibility into how third-party applications collect and share their private data. We address these shortcomings with TaintDroid, an efficient, system-wide dynamic taint tracking and analysis system capable of simultaneously tracking multiple sources of sensitive data. TaintDroid enables realtime analysis by leveraging Android's virtualized execution environment. TaintDroid incurs only 32\% performance overhead on a CPU-bound microbenchmark and imposes negligible overhead on interactive third-party applications. Using TaintDroid to monitor the behavior of 30 popular third-party Android applications, in our 2010 study we found 20 applications potentially misused users' private information; so did a similar fraction of the tested applications in our 2012 study. Monitoring the flow of privacy-sensitive data with TaintDroid provides valuable input for smartphone users and security service firms seeking to identify misbehaving applications.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Yu:2014:OBS, author = "Young Jin Yu and Dong In Shin and Woong Shin and Nae Young Song and Jae Woo Choi and Hyeong Seog Kim and Hyeonsang Eom and Heon Young Yeom", title = "Optimizing the Block {I/O} Subsystem for Fast Storage Devices", journal = j-TOCS, volume = "32", number = "2", pages = "6:1--6:??", month = jun, year = "2014", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2619092", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Jul 7 16:54:52 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Fast storage devices are an emerging solution to satisfy data-intensive applications. They provide high transaction rates for DBMS, low response times for Web servers, instant on-demand paging for applications with large memory footprints, and many similar advantages for performance-hungry applications. In spite of the benefits promised by fast hardware, modern operating systems are not yet structured to take advantage of the hardware's full potential. The software overhead caused by an OS, negligible in the past, adversely impacts application performance, lessening the advantage of using such hardware. Our analysis demonstrates that the overheads from the traditional storage-stack design are significant and cannot easily be overcome without modifying the hardware interface and adding new capabilities to the operating system. In this article, we propose six optimizations that enable an OS to fully exploit the performance characteristics of fast storage devices. With the support of new hardware interfaces, our optimizations minimize per-request latency by streamlining the I/O path and amortize per-request latency by maximizing parallelism inside the device. We demonstrate the impact on application performance through well-known storage benchmarks run against a Linux kernel with a customized SSD. We find that eliminating context switches in the I/O path decreases the software overhead of an I/O request from 20 microseconds to 5 microseconds and a new request merge scheme called Temporal Merge enables the OS to achieve 87\% to 100\% of peak device performance, regardless of request access patterns or types. Although the performance improvement by these optimizations on a standard SATA-based SSD is marginal (because of its limited interface and relatively high response times), our sensitivity analysis suggests that future SSDs with lower response times will benefit from these changes. The effectiveness of our optimizations encourages discussion between the OS community and storage vendors about future device interfaces for fast storage devices.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Samadi:2014:SPS, author = "Mehrzad Samadi and Janghaeng Lee and D. Anoushe Jamshidi and Scott Mahlke and Amir Hormati", title = "Scaling Performance via Self-Tuning Approximation for Graphics Engines", journal = j-TOCS, volume = "32", number = "3", pages = "7:1--7:??", month = sep, year = "2014", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2631913", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 21 07:18:28 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Approximate computing, where computation accuracy is traded off for better performance or higher data throughput, is one solution that can help data processing keep pace with the current and growing abundance of information. For particular domains, such as multimedia and learning algorithms, approximation is commonly used today. We consider automation to be essential to provide transparent approximation, and we show that larger benefits can be achieved by constructing the approximation techniques to fit the underlying hardware. Our target platform is the GPU because of its high performance capabilities and difficult programming challenges that can be alleviated with proper automation. Our approach --- SAGE --- combines a static compiler that automatically generates a set of CUDA kernels with varying levels of approximation with a runtime system that iteratively selects among the available kernels to achieve speedup while adhering to a target output quality set by the user. The SAGE compiler employs three optimization techniques to generate approximate kernels that exploit the GPU microarchitecture: selective discarding of atomic operations, data packing, and thread fusion. Across a set of machine learning and image processing kernels, SAGE's approximation yields an average of 2.5$ \times $ speedup with less than 10\% quality loss compared to the accurate execution on a NVIDIA GTX 560 GPU.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Wu:2014:EAH, author = "Lisa Wu and Orestis Polychroniou and Raymond J. Barker and Martha A. Kim and Kenneth A. Ross", title = "Energy Analysis of Hardware and Software Range Partitioning", journal = j-TOCS, volume = "32", number = "3", pages = "8:1--8:??", month = sep, year = "2014", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2638550", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 21 07:18:28 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Data partitioning is a critical operation for manipulating large datasets because it subdivides tasks into pieces that are more amenable to efficient processing. It is often the limiting factor in database performance and represents a significant fraction of the overall runtime of large data queries. This article measures the performance and energy of state-of-the-art software partitioners, and describes and evaluates a hardware range partitioner that further improves efficiency. The software implementation is broken into two phases, allowing separate analysis of the partition function computation and data shuffling costs. Although range partitioning is commonly thought to be more expensive than simpler strategies such as hash partitioning, our measurements indicate that careful data movement and optimization of the partition function can allow it to approach the throughput and energy consumption of hash or radix partitioning. For further acceleration, we describe a hardware range partitioner, or HARP, a streaming framework that offers a seamless execution environment for this and other streaming accelerators, and a detailed analysis of a 32nm physical design that matches the throughput of four to eight software threads while consuming just 6.9\% of the area and 4.3\% of the power of a Xeon core in the same technology generation.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Sampson:2014:ASS, author = "Adrian Sampson and Jacob Nelson and Karin Strauss and Luis Ceze", title = "Approximate Storage in Solid-State Memories", journal = j-TOCS, volume = "32", number = "3", pages = "9:1--9:??", month = sep, year = "2014", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2644808", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 21 07:18:28 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Memories today expose an all-or-nothing correctness model that incurs significant costs in performance, energy, area, and design complexity. But not all applications need high-precision storage for all of their data structures all of the time. This article proposes mechanisms that enable applications to store data approximately and shows that doing so can improve the performance, lifetime, or density of solid-state memories. We propose two mechanisms. The first allows errors in multilevel cells by reducing the number of programming pulses used to write them. The second mechanism mitigates wear-out failures and extends memory endurance by mapping approximate data onto blocks that have exhausted their hardware error correction resources. Simulations show that reduced-precision writes in multilevel phase-change memory cells can be 1.7 $ \times $ faster on average and using failed blocks can improve array lifetime by 23\% on average with quality loss under 10\%.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Clements:2015:SCR, author = "Austin T. Clements and M. Frans Kaashoek and Nickolai Zeldovich and Robert T. Morris and Eddie Kohler", title = "The Scalable Commutativity Rule: Designing Scalable Software for Multicore Processors", journal = j-TOCS, volume = "32", number = "4", pages = "10:1--10:??", month = jan, year = "2015", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2699681", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 21 07:18:30 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "What opportunities for multicore scalability are latent in software interfaces, such as system call APIs? Can scalability challenges and opportunities be identified even before any implementation exists, simply by considering interface specifications? To answer these questions, we introduce the scalable commutativity rule: whenever interface operations commute, they can be implemented in a way that scales. This rule is useful throughout the development process for scalable multicore software, from the interface design through implementation, testing, and evaluation. This article formalizes the scalable commutativity rule. This requires defining a novel form of commutativity, SIM commutativity, that lets the rule apply even to complex and highly stateful software interfaces. We also introduce a suite of software development tools based on the rule. Our Commuter tool accepts high-level interface models, generates tests of interface operations that commute and hence could scale, and uses these tests to systematically evaluate the scalability of implementations. We apply Commuter to a model of 18 POSIX file and virtual memory system operations. Using the resulting 26,238 scalability tests, Commuter highlights Linux kernel problems previously observed to limit application scalability and identifies previously unknown bottlenecks that may be triggered by future workloads or hardware. Finally, we apply the scalable commutativity rule and Commuter to the design and implementation sv6, a new POSIX-like operating system. sv6's novel file and virtual memory system designs enable it to scale for 99\% of the tests generated by Commuter. These results translate to linear scalability on an 80-core x86 machine for applications built on sv6's commutative operations.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Nair:2015:MMA, author = "Arun Arvind Nair and Stijn Eyerman and Jian Chen and Lizy Kurian John and Lieven Eeckhout", title = "Mechanistic Modeling of Architectural Vulnerability Factor", journal = j-TOCS, volume = "32", number = "4", pages = "11:1--11:??", month = jan, year = "2015", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2669364", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 21 07:18:30 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Reliability to soft errors is a significant design challenge in modern microprocessors owing to an exponential increase in the number of transistors on chip and the reduction in operating voltages with each process generation. Architectural Vulnerability Factor (AVF) modeling using microarchitectural simulators enables architects to make informed performance, power, and reliability tradeoffs. However, such simulators are time-consuming and do not reveal the microarchitectural mechanisms that influence AVF. In this article, we present an accurate first-order mechanistic analytical model to compute AVF, developed using the first principles of an out-of-order superscalar execution. This model provides insight into the fundamental interactions between the workload and microarchitecture that together influence AVF. We use the model to perform design space exploration, parametric sweeps, and workload characterization for AVF.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Aublin:2015:NBP, author = "Pierre-Louis Aublin and Rachid Guerraoui and Nikola Knezevi{\'c} and Vivien Qu{\'e}ma and Marko Vukoli{\'c}", title = "The Next 700 {BFT} Protocols", journal = j-TOCS, volume = "32", number = "4", pages = "12:1--12:??", month = jan, year = "2015", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2658994", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 21 07:18:30 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "We present Abstract (ABortable STate mAChine replicaTion), a new abstraction for designing and reconfiguring generalized replicated state machines that are, unlike traditional state machines, allowed to abort executing a client's request if ``something goes wrong.'' Abstract can be used to considerably simplify the incremental development of efficient Byzantine fault-tolerant state machine replication ( BFT) protocols that are notorious for being difficult to develop. In short, we treat a BFT protocol as a composition of Abstract instances. Each instance is developed and analyzed independently and optimized for specific system conditions. We illustrate the power of Abstract through several interesting examples. We first show how Abstract can yield benefits of a state-of-the-art BFT protocol in a less painful and error-prone manner. Namely, we develop AZyzzyva, a new protocol that mimics the celebrated best-case behavior of Zyzzyva using less than 35\% of the Zyzzyva code. To cover worst-case situations, our abstraction enables one to use in AZyzzyva any existing BFT protocol. We then present Aliph, a new BFT protocol that outperforms previous BFT protocols in terms of both latency (by up to 360\%) and throughput (by up to 30\%). Finally, we present R-Aliph, an implementation of Aliph that is robust, that is, whose performance degrades gracefully in the presence of Byzantine replicas and Byzantine clients.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Wang:2015:DAU, author = "Xi Wang and Nickolai Zeldovich and M. Frans Kaashoek and Armando Solar-Lezama", title = "A Differential Approach to Undefined Behavior Detection", journal = j-TOCS, volume = "33", number = "1", pages = "1:1--1:??", month = mar, year = "2015", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2699678", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Mar 13 07:03:25 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "This article studies undefined behavior arising in systems programming languages such as C/C++. Undefined behavior bugs lead to unpredictable and subtle systems behavior, and their effects can be further amplified by compiler optimizations. Undefined behavior bugs are present in many systems, including the Linux kernel and the Postgres database. The consequences range from incorrect functionality to missing security checks. This article proposes a formal and practical approach that finds undefined behavior bugs by finding ``unstable code'' in terms of optimizations that leverage undefined behavior. Using this approach, we introduce a new static checker called Stack that precisely identifies undefined behavior bugs. Applying Stack to widely used systems has uncovered 161 new bugs that have been confirmed and fixed by developers.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Bila:2015:EOP, author = "Nilton Bila and Eric J. Wright and Eyal {De Lara} and Kaustubh Joshi and H. Andr{\'e}s Lagar-Cavilla and Eunbyung Park and Ashvin Goel and Matti Hiltunen and Mahadev Satyanarayanan", title = "Energy-Oriented Partial Desktop Virtual Machine Migration", journal = j-TOCS, volume = "33", number = "1", pages = "2:1--2:??", month = mar, year = "2015", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2699683", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Mar 13 07:03:25 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Modern offices are crowded with personal computers. While studies have shown these to be idle most of the time, they remain powered, consuming up to 60\% of their peak power. Hardware-based solutions engendered by PC vendors (e.g., low-power states, Wake-on-LAN) have proved unsuccessful because, in spite of user inactivity, these machines often need to remain network active in support of background applications that maintain network presence. Recent proposals have advocated the use of consolidation of idle desktop Virtual Machines (VMs). However, desktop VMs are often large, requiring gigabytes of memory. Consolidating such VMs creates large network transfers lasting in the order of minutes and utilizes server memory inefficiently. When multiple VMs migrate concurrently, networks become congested, and the resulting migration latencies are prohibitive. We present partial VM migration, an approach that transparently migrates only the working set of an idle VM. It creates a partial replica of the desktop VM on the consolidation server by copying only VM metadata, and it transfers pages to the server on-demand, as the VM accesses them. This approach places desktop PCs in low-power mode when inactive and switches them to running mode when pages are needed by the VM running on the consolidation server. To ensure that desktops save energy, we have developed sleep scheduling and prefetching algorithms, as well as the context-aware selective resume framework, a novel approach to reduce the latency of power mode transition operations in commodity PCs. Jettison, our software prototype of partial VM migration for off-the-shelf PCs, can deliver 44--91\% energy savings during idle periods of at least 10 minutes, while providing low migration latencies of about 4 seconds and migrating minimal state that is under an order of magnitude of the VM's memory footprint.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Blem:2015:IWU, author = "Emily Blem and Jaikrishnan Menon and Thiruvengadam Vijayaraghavan and Karthikeyan Sankaralingam", title = "{ISA} Wars: Understanding the Relevance of {ISA} being {RISC} or {CISC} to Performance, Power, and Energy on Modern Architectures", journal = j-TOCS, volume = "33", number = "1", pages = "3:1--3:??", month = mar, year = "2015", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2699682", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Mar 13 07:03:25 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "RISC versus CISC wars raged in the 1980s when chip area and processor design complexity were the primary constraints and desktops and servers exclusively dominated the computing landscape. Today, energy and power are the primary design constraints and the computing landscape is significantly different: Growth in tablets and smartphones running ARM (a RISC ISA) is surpassing that of desktops and laptops running x86 (a CISC ISA). Furthermore, the traditionally low-power ARM ISA is entering the high-performance server market, while the traditionally high-performance x86 ISA is entering the mobile low-power device market. Thus, the question of whether ISA plays an intrinsic role in performance or energy efficiency is becoming important again, and we seek to answer this question through a detailed measurement-based study on real hardware running real applications. We analyze measurements on seven platforms spanning three ISAs (MIPS, ARM, and x86) over workloads spanning mobile, desktop, and server computing. Our methodical investigation demonstrates the role of ISA in modern microprocessors' performance and energy efficiency. We find that ARM, MIPS, and x86 processors are simply engineering design points optimized for different levels of performance, and there is nothing fundamentally more energy efficient in one ISA class or the other. The ISA being RISC or CISC seems irrelevant.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Lin:2015:KMO, author = "Felix Xiaozhu Lin and Zhen Wang and Lin Zhong", title = "{K2}: a Mobile Operating System for Heterogeneous Coherence Domains", journal = j-TOCS, volume = "33", number = "2", pages = "4:1--4:??", month = jun, year = "2015", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2699676", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jun 10 11:00:03 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Mobile System-on-Chips (SoC) that incorporate heterogeneous coherence domains promise high energy efficiency to a wide range of mobile applications, yet are difficult to program. To exploit the architecture, a desirable, yet missing capability is to replicate operating system (OS) services over multiple coherence domains with minimum inter-domain communication. In designing such an OS, we set three goals: to ease application development, to simplify OS engineering, and to preserve the current OS performance. To this end, we identify a shared-most OS model for multiple coherence domains: creating per-domain instances of core OS services with no shared state, while enabling other extended OS services to share state across domains. To test the model, we build K2, a prototype OS on the TI OMAP4 SoC, by reusing most of the Linux 3.4 source. K2 presents a single system image to applications with its two kernels running on top of the two coherence domains of OMAP4. The two kernels have independent instances of core OS services, such as page allocation and interrupt management, as coordinated by K2; the two kernels share most extended OS services, such as device drivers, whose state is kept coherent transparently by K2. Despite platform constraints and unoptimized code, K2 improves energy efficiency for light OS workloads by 8x--10x, while incurring less than 9\% performance overhead for two device drivers shared between kernels. Our experiences with K2 show that the shared-most model is promising.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Johansen:2015:FSS, author = "H{\aa}vard D. Johansen and Robbert {Van Renesse} and Ymir Vigfusson and Dag Johansen", title = "{Fireflies}: a Secure and Scalable Membership and Gossip Service", journal = j-TOCS, volume = "33", number = "2", pages = "5:1--5:??", month = jun, year = "2015", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2701418", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jun 10 11:00:03 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "An attacker who controls a computer in an overlay network can effectively control the entire overlay network if the mechanism managing membership information can successfully be targeted. This article describes Fireflies, an overlay network protocol that fights such attacks by organizing members in a verifiable pseudorandom structure so that an intruder cannot incorrectly modify the membership views of correct members. Fireflies provides each member with a view of the entire membership, and supports networks with moderate total churn. We evaluate Fireflies using both simulations and PlanetLab to show that Fireflies is a practical approach for secure membership maintenance in such networks.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Chen:2015:SFA, author = "Tianshi Chen and Shijin Zhang and Shaoli Liu and Zidong Du and Tao Luo and Yuan Gao and Junjie Liu and Dongsheng Wang and Chengyong Wu and Ninghui Sun and Yunji Chen and Olivier Temam", title = "A Small-Footprint Accelerator for Large-Scale Neural Networks", journal = j-TOCS, volume = "33", number = "2", pages = "6:1--6:??", month = jun, year = "2015", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2701417", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jun 10 11:00:03 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Machine-learning tasks are becoming pervasive in a broad range of domains, and in a broad range of systems (from embedded systems to data centers). At the same time, a small set of machine-learning algorithms (especially Convolutional and Deep Neural Networks, i.e., CNNs and DNNs) are proving to be state-of-the-art across many applications. As architectures evolve toward heterogeneous multicores composed of a mix of cores and accelerators, a machine-learning accelerator can achieve the rare combination of efficiency (due to the small number of target algorithms) and broad application scope. Until now, most machine-learning accelerator designs have been focusing on efficiently implementing the computational part of the algorithms. However, recent state-of-the-art CNNs and DNNs are characterized by their large size. In this study, we design an accelerator for large-scale CNNs and DNNs, with a special emphasis on the impact of memory on accelerator design, performance, and energy. We show that it is possible to design an accelerator with a high throughput, capable of performing 452 GOP/s (key NN operations such as synaptic weight multiplications and neurons outputs additions) in a small footprint of 3.02mm$^2$ and 485mW; compared to a 128-bit 2GHz SIMD processor, the accelerator is $ 117.87 \times $ faster, and it can reduce the total energy by $ 21.08 \times $. The accelerator characteristics are obtained after layout at 65nm. Such a high throughput in a small footprint can open up the usage of state-of-the-art machine-learning algorithms in a broad set of systems and for a broad set of applications.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Ousterhout:2015:RSS, author = "John Ousterhout and Arjun Gopalan and Ashish Gupta and Ankita Kejriwal and Collin Lee and Behnam Montazeri and Diego Ongaro and Seo Jin Park and Henry Qin and Mendel Rosenblum and Stephen Rumble and Ryan Stutsman and Stephen Yang", title = "The {RAMCloud} Storage System", journal = j-TOCS, volume = "33", number = "3", pages = "7:1--7:??", month = sep, year = "2015", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2806887", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Sep 14 10:11:30 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "RAMCloud is a storage system that provides low-latency access to large-scale datasets. To achieve low latency, RAMCloud stores all data in DRAM at all times. To support large capacities (1PB or more), it aggregates the memories of thousands of servers into a single coherent key-value store. RAMCloud ensures the durability of DRAM-based data by keeping backup copies on secondary storage. It uses a uniform log-structured mechanism to manage both DRAM and secondary storage, which results in high performance and efficient memory usage. RAMCloud uses a polling-based approach to communication, bypassing the kernel to communicate directly with NICs; with this approach, client applications can read small objects from any RAMCloud storage server in less than 5 $ \mu $ s, durable writes of small objects take about 13.5 $ \mu $ s. RAMCloud does not keep multiple copies of data online; instead, it provides high availability by recovering from crashes very quickly (1 to 2 seconds). RAMCloud's crash recovery mechanism harnesses the resources of the entire cluster working concurrently so that recovery performance scales with cluster size.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Baumann:2015:SAU, author = "Andrew Baumann and Marcus Peinado and Galen Hunt", title = "Shielding Applications from an Untrusted Cloud with {Haven}", journal = j-TOCS, volume = "33", number = "3", pages = "8:1--8:??", month = sep, year = "2015", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2799647", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Sep 14 10:11:30 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Today's cloud computing infrastructure requires substantial trust. Cloud users rely on both the provider's staff and its globally distributed software/hardware platform not to expose any of their private data. We introduce the notion of shielded execution, which protects the confidentiality and integrity of a program and its data from the platform on which it runs (i.e., the cloud operator's OS, VM, and firmware). Our prototype, Haven, is the first system to achieve shielded execution of unmodified legacy applications, including SQL Server and Apache, on a commodity OS (Windows) and commodity hardware. Haven leverages the hardware protection of Intel SGX to defend against privileged code and physical attacks such as memory probes, and also addresses the dual challenges of executing unmodified legacy binaries and protecting them from a malicious host. This work motivated recent changes in the SGX specification.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Lee:2015:SSK, author = "Janghaeng Lee and Mehrzad Samadi and Yongjun Park and Scott Mahlke", title = "{SKMD}: Single Kernel on Multiple Devices for Transparent {CPU--GPU} Collaboration", journal = j-TOCS, volume = "33", number = "3", pages = "9:1--9:??", month = sep, year = "2015", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2798725", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Sep 14 10:11:30 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Heterogeneous computing on CPUs and GPUs has traditionally used fixed roles for each device: the GPU handles data parallel work by taking advantage of its massive number of cores while the CPU handles non data-parallel work, such as the sequential code or data transfer management. This work distribution can be a poor solution as it underutilizes the CPU, has difficulty generalizing beyond the single CPU-GPU combination, and may waste a large fraction of time transferring data. Further, CPUs are performance competitive with GPUs on many workloads, thus simply partitioning work based on the fixed roles may be a poor choice. In this article, we present the single-kernel multiple devices (SKMD) system, a framework that transparently orchestrates collaborative execution of a single data-parallel kernel across multiple asymmetric CPUs and GPUs. The programmer is responsible for developing a single data-parallel kernel in OpenCL, while the system automatically partitions the workload across an arbitrary set of devices, generates kernels to execute the partial workloads, and efficiently merges the partial outputs together. The goal is performance improvement by maximally utilizing all available resources to execute the kernel. SKMD handles the difficult challenges of exposed data transfer costs and the performance variations GPUs have with respect to input size. On real hardware, SKMD achieves an average speedup of 28\% on a system with one multicore CPU and two asymmetric GPUs compared to a fastest device execution strategy for a set of popular OpenCL kernels.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Pellauer:2015:ECC, author = "Michael Pellauer and Angshuman Parashar and Michael Adler and Bushra Ahsan and Randy Allmon and Neal Crago and Kermin Fleming and Mohit Gambhir and Aamer Jaleel and Tushar Krishna and Daniel Lustig and Stephen Maresh and Vladimir Pavlov and Rachid Rayess and Antonia Zhai and Joel Emer", title = "Efficient Control and Communication Paradigms for Coarse-Grained Spatial Architectures", journal = j-TOCS, volume = "33", number = "3", pages = "10:1--10:??", month = sep, year = "2015", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2754930", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Sep 14 10:11:30 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "There has been recent interest in exploring the acceleration of nonvectorizable workloads with spatially programmed architectures that are designed to efficiently exploit pipeline parallelism. Such an architecture faces two main problems: how to efficiently control each processing element (PE) in the system, and how to facilitate inter-PE communication without the overheads of traditional shared-memory coherent memory. In this article, we explore solving these problems using triggered instructions and latency-insensitive channels. Triggered instructions completely eliminate the program counter (PC) and allow programs to transition concisely between states without explicit branch instructions. Latency-insensitive channels allow efficient communication of inter-PE control information while simultaneously enabling flexible code placement and improving tolerance for variable events such as cache accesses. Together, these approaches provide a unified mechanism to avoid overserialized execution, essentially achieving the effect of techniques such as dynamic instruction reordering and multithreading. Our analysis shows that a spatial accelerator using triggered instructions and latency-insensitive channels can achieve 8 $ \times $ greater area-normalized performance than a traditional general-purpose processor. Further analysis shows that triggered control reduces the number of static and dynamic instructions in the critical paths by 62\% and 64\%, respectively, over a PC-style baseline, increasing the performance of the spatial programming approach by 2.0 $ \times $.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Peter:2016:AOS, author = "Simon Peter and Jialin Li and Irene Zhang and Dan R. K. Ports and Doug Woos and Arvind Krishnamurthy and Thomas Anderson and Timothy Roscoe", title = "{Arrakis}: The Operating System Is the Control Plane", journal = j-TOCS, volume = "33", number = "4", pages = "11:1--11:??", month = jan, year = "2016", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2812806", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 6 06:45:30 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Recent device hardware trends enable a new approach to the design of network server operating systems. In a traditional operating system, the kernel mediates access to device hardware by server applications to enforce process isolation as well as network and disk security. We have designed and implemented a new operating system, Arrakis, that splits the traditional role of the kernel in two. Applications have direct access to virtualized I/O devices, allowing most I/O operations to skip the kernel entirely, while the kernel is re-engineered to provide network and disk protection without kernel mediation of every operation. We describe the hardware and software changes needed to take advantage of this new abstraction, and we illustrate its power by showing improvements of 2 to 5 $ \times $ in latency and 9 $ \times $ throughput for a popular persistent NoSQL store relative to a well-tuned Linux implementation.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Kumar:2016:ASC, author = "Rakesh Kumar and Alejandro Mart{\'\i}nez and Antonio Gonz{\'a}lez", title = "Assisting Static Compiler Vectorization with a Speculative Dynamic Vectorizer in an {HW\slash SW} Codesigned Environment", journal = j-TOCS, volume = "33", number = "4", pages = "12:1--12:??", month = jan, year = "2016", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2807694", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 6 06:45:30 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Compiler-based static vectorization is used widely to extract data-level parallelism from computation-intensive applications. Static vectorization is very effective in vectorizing traditional array-based applications. However, compilers' inability to do accurate interprocedural pointer disambiguation and interprocedural array dependence analysis severely limits vectorization opportunities. HW/SW codesigned processors provide an excellent opportunity to optimize the applications at runtime. The availability of dynamic application behavior at runtime helps in capturing vectorization opportunities generally missed by the compilers. This article proposes to complement the static vectorization with a speculative dynamic vectorizer in an HW/SW codesigned processor. We present a speculative dynamic vectorization algorithm that speculatively reorders ambiguous memory references to uncover vectorization opportunities. The speculative reordering of memory instructions avoids the need for accurate interprocedural pointer disambiguation and interprocedural array dependence analysis. The hardware checks for any memory dependence violation due to speculative vectorization and takes corrective action in case of violation. Our experiments show that the combined (static + dynamic) vectorization approach provides a $ 2 \times $ performance benefit compared to the static GCC vectorization alone, for SPECFP2006. Furthermore, the speculative dynamic vectorizer is able to vectorize 48\% of the loops that ICC failed to vectorize due to conservative dependence analysis in the TSVC benchmark suite. Moreover, the dynamic vectorization scheme is as effective in vectorization of pointer-based applications as for the array-based ones, whereas compilers lose significant vectorization opportunities in pointer-based applications. Furthermore, we show that speculation is not only a luxury but also a necessity for runtime vectorization.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Lozi:2016:FPL, author = "Jean-Pierre Lozi and Florian David and Ga{\"e}l Thomas and Julia Lawall and Gilles Muller", title = "Fast and Portable Locking for Multicore Architectures", journal = j-TOCS, volume = "33", number = "4", pages = "13:1--13:??", month = jan, year = "2016", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2845079", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 6 06:45:30 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "The scalability of multithreaded applications on current multicore systems is hampered by the performance of lock algorithms, due to the costs of access contention and cache misses. The main contribution presented in this article is a new locking technique, Remote Core Locking (RCL), that aims to accelerate the execution of critical sections in legacy applications on multicore architectures. The idea of RCL is to replace lock acquisitions by optimized remote procedure calls to a dedicated server hardware thread. RCL limits the performance collapse observed with other lock algorithms when many threads try to acquire a lock concurrently and removes the need to transfer lock-protected shared data to the hardware thread acquiring the lock, because such data can typically remain in the server's cache. Other contributions presented in this article include a profiler that identifies the locks that are the bottlenecks in multithreaded applications and that can thus benefit from RCL, and a reengineering tool that transforms POSIX lock acquisitions into RCL locks. Eighteen applications were used to evaluate RCL: the nine applications of the SPLASH-2 benchmark suite, the seven applications of the Phoenix 2 benchmark suite, Memcached, and Berkeley DB with a TPC-C client. Eight of these applications are unable to scale because of locks and benefit from RCL on an x86 machine with four AMD Opteron processors and 48 hardware threads. By using RCL instead of Linux POSIX locks, performance is improved by up to 2.5 times on Memcached, and up to 11.6 times on Berkeley DB with the TPC-C client. On a SPARC machine with two Sun UltraSPARC T2+ processors and 128 hardware threads, three applications benefit from RCL. In particular, performance is improved by up to 1.3 times with respect to Solaris POSIX locks on Memcached, and up to 7.9 times on Berkeley DB with the TPC-C client.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Heiser:2016:LML, author = "Gernot Heiser and Kevin Elphinstone", title = "{L4} Microkernels: The Lessons from 20 Years of Research and Deployment", journal = j-TOCS, volume = "34", number = "1", pages = "1:1--1:29", month = apr, year = "2016", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2893177", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat May 21 08:09:53 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "The L4 microkernel has undergone 20 years of use and evolution. It has an active user and developer community, and there are commercial versions that are deployed on a large scale and in safety-critical systems. In this article we examine the lessons learnt in those 20 years about microkernel design and implementation. We revisit the L4 design articles and examine the evolution of design and implementation from the original L4 to the latest generation of L4 kernels. We specifically look at seL4, which has pushed the L4 model furthest and was the first OS kernel to undergo a complete formal verification of its implementation as well as a sound analysis of worst-case execution times. We demonstrate that while much has changed, the fundamental principles of minimality, generality, and high inter-process communication (IPC) performance remain the main drivers of design and implementation decisions.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Hauswald:2016:DFW, author = "Johann Hauswald and Michael A. Laurenzano and Yunqi Zhang and Hailong Yang and Yiping Kang and Cheng Li and Austin Rovinski and Arjun Khurana and Ronald G. Dreslinski and Trevor Mudge and Vinicius Petrucci and Lingjia Tang and Jason Mars", title = "Designing Future Warehouse-Scale Computers for {Sirius}, an End-to-End Voice and Vision Personal Assistant", journal = j-TOCS, volume = "34", number = "1", pages = "2:1--2:??", month = apr, year = "2016", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2870631", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat May 21 08:09:53 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "As user demand scales for intelligent personal assistants (IPAs) such as Apple's Siri, Google's Google Now, and Microsoft's Cortana, we are approaching the computational limits of current datacenter (DC) architectures. It is an open question how future server architectures should evolve to enable this emerging class of applications, and the lack of an open-source IPA workload is an obstacle in addressing this question. In this article, we present the design of Sirius, an open end-to-end IPA Web-service application that accepts queries in the form of voice and images, and responds with natural language. We then use this workload to investigate the implications of four points in the design space of future accelerator-based server architectures spanning traditional CPUs, GPUs, manycore throughput co-processors, and FPGAs. To investigate future server designs for Sirius, we decompose Sirius into a suite of eight benchmarks (Sirius Suite) comprising the computationally intensive bottlenecks of Sirius. We port Sirius Suite to a spectrum of accelerator platforms and use the performance and power trade-offs across these platforms to perform a total cost of ownership (TCO) analysis of various server design points. In our study, we find that accelerators are critical for the future scalability of IPA services. Our results show that GPU- and FPGA-accelerated servers improve the query latency on average by 8.5$ \times $ and 15$ \times $, respectively. For a given throughput, GPU- and FPGA-accelerated servers can reduce the TCO of DCs by 2.3$ \times $ and 1.3$ \times $, respectively.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Badamo:2016:IPE, author = "Michael Badamo and Jeff Casarona and Minshu Zhao and Donald Yeung", title = "Identifying Power-Efficient Multicore Cache Hierarchies via Reuse Distance Analysis", journal = j-TOCS, volume = "34", number = "1", pages = "3:1--3:??", month = apr, year = "2016", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2851503", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat May 21 08:09:53 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "To enable performance improvements in a power-efficient manner, computer architects have been building CPUs that exploit greater amounts of thread-level parallelism. A key consideration in such CPUs is properly designing the on-chip cache hierarchy. Unfortunately, this can be hard to do, especially for CPUs with high core counts and large amounts of cache. The enormous design space formed by the combinatorial number of ways in which to organize the cache hierarchy makes it difficult to identify power-efficient configurations. Moreover, the problem is exacerbated by the slow speed of architectural simulation, which is the primary means for conducting such design space studies. A powerful tool that can help architects optimize CPU cache hierarchies is reuse distance (RD) analysis. Recent work has extended uniprocessor RD techniques-i.e., by introducing concurrent RD and private-stack RD profiling-to enable analysis of different types of caches in multicore CPUs. Once acquired, parallel locality profiles can predict the performance of numerous cache configurations, permitting highly efficient design space exploration. To date, existing work on multicore RD analysis has focused on developing the profiling techniques and assessing their accuracy. Unfortunately, there has been no work on using RD analysis to optimize CPU performance or power consumption. This article investigates applying multicore RD analysis to identify the most power efficient cache configurations for a multicore CPU. First, we develop analytical models that use the cache-miss counts from parallel locality profiles to estimate CPU performance and power consumption. Although future scalable CPUs will likely employ multithreaded (and even out-of-order) cores, our current study assumes single-threaded in-order cores to simplify the models, allowing us to focus on the cache hierarchy and our RD-based techniques. Second, to demonstrate the utility of our techniques, we apply our models to optimize a large-scale tiled CPU architecture with a two-level cache hierarchy. We show that the most power efficient configuration varies considerably across different benchmarks, and that our locality profiles provide deep insights into why certain configurations are power efficient. We also show that picking the best configuration can provide significant gains, as there is a 2.01x power efficiency spread across our tiled CPU design space. Finally, we validate the accuracy of our techniques using detailed simulation. Among several simulated configurations, our techniques can usually pick the most power efficient configuration, or one that is very close to the best. In addition, across all simulated configurations, we can predict power efficiency with 15.2\% error.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Perais:2016:ECS, author = "Arthur Perais and Andr{\'e} Seznec", title = "{EOLE}: Combining Static and Dynamic Scheduling Through Value Prediction to Reduce Complexity and Increase Performance", journal = j-TOCS, volume = "34", number = "2", pages = "4:1--4:??", month = may, year = "2016", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2870632", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat May 21 08:09:53 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Recent work in the field of value prediction (VP) has shown that given an efficient confidence estimation mechanism, prediction validation could be removed from the out-of-order engine and delayed until commit time. As a result, a simple recovery mechanism-pipeline squashing-can be used, whereas the out-of-order engine remains mostly unmodified. Yet, VP and validation at commit time require additional ports on the physical register file, potentially rendering the overall number of ports unbearable. Fortunately, VP also implies that many single-cycle ALU instructions have their operands predicted in the front-end and can be executed in-place, in-order. Similarly, the execution of single-cycle instructions whose result has been predicted can be delayed until commit time since predictions are validated at commit time. Consequently, a significant number of instructions-10\% to 70\% in our experiments-can bypass the out-of-order engine, allowing for a reduction of the issue width. This reduction paves the way for a truly practical implementation of VP. Furthermore, since VP in itself usually increases performance, our resulting {Early-Out-of-Order-Late} Execution architecture, EOLE, is often more efficient than a baseline VP-augmented 6-issue superscalar while having a significantly narrower 4-issue out-of-order engine.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Li:2016:FSA, author = "Sheng Li and Hyeontaek Lim and Victor W. Lee and Jung Ho Ahn and Anuj Kalia and Michael Kaminsky and David G. Andersen and Seongil O. and Sukhan Lee and Pradeep Dubey", title = "Full-Stack Architecting to Achieve a Billion-Requests-Per-Second Throughput on a Single Key--Value Store Server Platform", journal = j-TOCS, volume = "34", number = "2", pages = "5:1--5:??", month = may, year = "2016", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2897393", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat May 21 08:09:53 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Distributed in-memory key-value stores (KVSs), such as memcached, have become a critical data serving layer in modern Internet-oriented data center infrastructure. Their performance and efficiency directly affect the QoS of web services and the efficiency of data centers. Traditionally, these systems have had significant overheads from inefficient network processing, OS kernel involvement, and concurrency control. Two recent research thrusts have focused on improving key-value performance. Hardware-centric research has started to explore specialized platforms including FPGAs for KVSs; results demonstrated an order of magnitude increase in throughput and energy efficiency over stock memcached. Software-centric research revisited the KVS application to address fundamental software bottlenecks and to exploit the full potential of modern commodity hardware; these efforts also showed orders of magnitude improvement over stock memcached. We aim at architecting high-performance and efficient KVS platforms, and start with a rigorous architectural characterization across system stacks over a collection of representative KVS implementations. Our detailed full-system characterization not only identifies the critical hardware/software ingredients for high-performance KVS systems but also leads to guided optimizations atop a recent design to achieve a record-setting throughput of 120 million requests per second (MRPS) (167MRPS with client-side batching) on a single commodity server. Our system delivers the best performance and energy efficiency (RPS/watt) demonstrated to date over existing KVSs including the best-published FPGA-based and GPU-based claims. We craft a set of design principles for future platform architectures, and via detailed simulations demonstrate the capability of achieving a billion RPS with a single server constructed following our principles.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Lo:2016:IRE, author = "David Lo and Liqun Cheng and Rama Govindaraju and Parthasarathy Ranganathan and Christos Kozyrakis", title = "Improving Resource Efficiency at Scale with {Heracles}", journal = j-TOCS, volume = "34", number = "2", pages = "6:1--6:??", month = may, year = "2016", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2882783", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat May 21 08:09:53 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "User-facing, latency-sensitive services, such as websearch, underutilize their computing resources during daily periods of low traffic. Reusing those resources for other tasks is rarely done in production services since the contention for shared resources can cause latency spikes that violate the service-level objectives of latency-sensitive tasks. The resulting under-utilization hurts both the affordability and energy efficiency of large-scale datacenters. With the slowdown in technology scaling caused by the sunsetting of Moore's law, it becomes important to address this opportunity. We present Heracles, a feedback-based controller that enables the safe colocation of best-effort tasks alongside a latency-critical service. Heracles dynamically manages multiple hardware and software isolation mechanisms, such as CPU, memory, and network isolation, to ensure that the latency-sensitive job meets latency targets while maximizing the resources given to best-effort tasks. We evaluate Heracles using production latency-critical and batch workloads from Google and demonstrate average server utilizations of 90\% without latency violations across all the load and colocation scenarios that we evaluated.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Jun:2016:BDF, author = "Sang-Woo Jun and Ming Liu and Sungjin Lee and Jamey Hicks and John Ankcorn and Myron King and Shuotao Xu and Arvind", title = "{BlueDBM}: Distributed Flash Storage for Big Data Analytics", journal = j-TOCS, volume = "34", number = "3", pages = "7:1--7:??", month = sep, year = "2016", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2898996", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Sep 17 16:09:15 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Complex data queries, because of their need for random accesses, have proven to be slow unless all the data can be accommodated in DRAM. There are many domains, such as genomics, geological data, and daily Twitter feeds, where the datasets of interest are 5TB to 20TB. For such a dataset, one would need a cluster with 100 servers, each with 128GB to 256GB of DRAM, to accommodate all the data in DRAM. On the other hand, such datasets could be stored easily in the flash memory of a rack-sized cluster. Flash storage has much better random access performance than hard disks, which makes it desirable for analytics workloads. However, currently available off-the-shelf flash storage packaged as SSDs does not make effective use of flash storage because it incurs a great amount of additional overhead during flash device management and network access. In this article, we present BlueDBM, a new system architecture that has flash-based storage with in-store processing capability and a low-latency high-throughput intercontroller network between storage devices. We show that BlueDBM outperforms a flash-based system without these features by a factor of 10 for some important applications. While the performance of a DRAM-centric system falls sharply even if only 5\% to 10\% of the references are to secondary storage, this sharp performance degradation is not an issue in BlueDBM. BlueDBM presents an attractive point in the cost/performance tradeoff for Big Data analytics.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{West:2016:VSK, author = "Richard West and Ye Li and Eric Missimer and Matthew Danish", title = "A Virtualized Separation Kernel for Mixed-Criticality Systems", journal = j-TOCS, volume = "34", number = "3", pages = "8:1--8:??", month = sep, year = "2016", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2935748", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Sep 17 16:09:15 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Multi- and many-core processors are becoming increasingly popular in embedded systems. Many of these processors now feature hardware virtualization capabilities, as found on the ARM Cortex A15 and x86 architectures with Intel VT-x or AMD-V support. Hardware virtualization provides a way to partition physical resources, including processor cores, memory, and I/O devices, among guest virtual machines (VMs). Each VM is then able to host tasks of a specific criticality level, as part of a mixed-criticality system with different timing and safety requirements. However, traditional virtual machine systems are inappropriate for mixed-criticality computing. They use hypervisors to schedule separate VMs on physical processor cores. The costs of trapping into hypervisors to multiplex and manage machine physical resources on behalf of separate guests are too expensive for many time-critical tasks. Additionally, traditional hypervisors have memory footprints that are often too large for many embedded computing systems. In this article, we discuss the design of the Quest-V separation kernel, which partitions services of different criticality levels across separate VMs, or sandboxes. Each sandbox encapsulates a subset of machine physical resources that it manages without requiring intervention from a hypervisor. In Quest-V, a hypervisor is only needed to bootstrap the system, recover from certain faults, and establish communication channels between sandboxes. This not only reduces the memory footprint of the most privileged protection domain but also removes it from the control path during normal system operation, thereby heightening security.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Silberstein:2016:GNA, author = "Mark Silberstein and Sangman Kim and Seonggu Huh and Xinya Zhang and Yige Hu and Amir Wated and Emmett Witchel", title = "{GPUnet}: Networking Abstractions for {GPU} Programs", journal = j-TOCS, volume = "34", number = "3", pages = "9:1--9:??", month = sep, year = "2016", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2963098", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Sep 17 16:09:15 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Despite the popularity of GPUs in high-performance and scientific computing, and despite increasingly general-purpose hardware capabilities, the use of GPUs in network servers or distributed systems poses significant challenges. GPUnet is a native GPU networking layer that provides a socket abstraction and high-level networking APIs for GPU programs. We use GPUnet to streamline the development of high-performance, distributed applications like in-GPU-memory MapReduce and a new class of low-latency, high-throughput GPU-native network services such as a face verification server.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Zheng:2017:RAS, author = "Mai Zheng and Joseph Tucek and Feng Qin and Mark Lillibridge and Bill W. Zhao and Elizabeth S. Yang", title = "Reliability Analysis of {SSDs} Under Power Fault", journal = j-TOCS, volume = "34", number = "4", pages = "10:1--10:??", month = jan, year = "2017", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2992782", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Jul 24 09:40:46 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Modern storage technology (solid-state disks (SSDs), NoSQL databases, commoditized RAID hardware, etc.) brings new reliability challenges to the already-complicated storage stack. Among other things, the behavior of these new components during power faults-which happen relatively frequently in data centers-is an important yet mostly ignored issue in this dependability-critical area. Understanding how new storage components behave under power fault is the first step towards designing new robust storage systems. In this article, we propose a new methodology to expose reliability issues in block devices under power faults. Our framework includes specially designed hardware to inject power faults directly to devices, workloads to stress storage components, and techniques to detect various types of failures. Applying our testing framework, we test 17 commodity SSDs from six different vendors using more than three thousand fault injection cycles in total. Our experimental results reveal that 14 of the 17 tested SSD devices exhibit surprising failure behaviors under power faults, including bit corruption, shorn writes, unserializable writes, metadata corruption, and total device failure.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Belay:2017:IOS, author = "Adam Belay and George Prekas and Mia Primorac and Ana Klimovic and Samuel Grossman and Christos Kozyrakis and Edouard Bugnion", title = "The {IX} Operating System: Combining Low Latency, High Throughput, and Efficiency in a Protected Dataplane", journal = j-TOCS, volume = "34", number = "4", pages = "11:1--11:??", month = jan, year = "2017", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2997641", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Jul 24 09:40:46 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "See correction \cite{Belay:2017:CIO}.", abstract = "The conventional wisdom is that aggressive networking requirements, such as high packet rates for small messages and $ \mu $ s-scale tail latency, are best addressed outside the kernel, in a user-level networking stack. We present ix, a dataplane operating system that provides high I/O performance and high resource efficiency while maintaining the protection and isolation benefits of existing kernels. ix uses hardware virtualization to separate management and scheduling functions of the kernel (control plane) from network processing (dataplane). The dataplane architecture builds upon a native, zero-copy API and optimizes for both bandwidth and latency by dedicating hardware threads and networking queues to dataplane instances, processing bounded batches of packets to completion, and eliminating coherence traffic and multicore synchronization. The control plane dynamically adjusts core allocations and voltage/frequency settings to meet service-level objectives. We demonstrate that ix outperforms Linux and a user-space network stack significantly in both throughput and end-to-end latency. Moreover, ix improves the throughput of a widely deployed, key-value store by up to 6.$ 4 \times $ and reduces tail latency by more than $ 2 \times $. With three varying load patterns, the control plane saves 46\%--54\% of processor energy, and it allows background jobs to run at 35\%--47\% of their standalone throughput.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Zahedi:2017:CSA, author = "Seyed Majid Zahedi and Songchun Fan and Matthew Faw and Elijah Cole and Benjamin C. Lee", title = "Computational Sprinting: Architecture, Dynamics, and Strategies", journal = j-TOCS, volume = "34", number = "4", pages = "12:1--12:??", month = jan, year = "2017", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3014428", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Jul 24 09:40:46 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Computational sprinting is a class of mechanisms that boost performance but dissipate additional power. We describe a sprinting architecture in which many, independent chip multiprocessors share a power supply and sprints are constrained by the chips' thermal limits and the rack's power limits. Moreover, we present the computational sprinting game, a multi-agent perspective on managing sprints. Strategic agents decide whether to sprint based on application phases and system conditions. The game produces an equilibrium that improves task throughput for data analytics workloads by 4--6$ \times $ over prior greedy heuristics and performs within 90\% of an upper bound on throughput from a globally optimized policy.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Zhu:2017:OGP, author = "Yuhao Zhu and Vijay Janapa Reddi", title = "Optimizing General-Purpose {CPUs} for Energy-Efficient Mobile {Web} Computing", journal = j-TOCS, volume = "35", number = "1", pages = "1:1--1:??", month = jul, year = "2017", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3041024", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Jul 24 09:40:47 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Mobile applications are increasingly being built using web technologies as a common substrate to achieve portability and to improve developer productivity. Unfortunately, web applications often incur large performance overhead, directly affecting the user quality-of-service (QoS) experience. Traditional techniques in improving mobile processor performance have mostly been adopting desktop-like design techniques such as increasing single-core microarchitecture complexity and aggressively integrating more cores. However, such a desktop-oriented strategy is likely coming to an end due to the stringent energy and thermal constraints that mobile devices impose. Therefore, we must pivot away from traditional mobile processor design techniques in order to provide sustainable performance improvement while maintaining energy efficiency. In this article, we propose to combine hardware customization and specialization techniques to improve the performance and energy efficiency of mobile web applications. We first perform design-space exploration (DSE) and identify opportunities in customizing existing general-purpose mobile processors, that is, tuning microarchitecture parameters. The thorough DSE also lets us discover sources of energy inefficiency in customized general-purpose architectures. To mitigate these inefficiencies, we propose, synthesize, and evaluate two new domain-specific specializations, called the Style Resolution Unit and the Browser Engine Cache. Our optimizations boost performance and energy efficiency at the same time while maintaining general-purpose programmability. As emerging mobile workloads increasingly rely more on web technologies, the type of optimizations we propose will become important in the future and are likely to have a long-lasting and widespread impact.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Hsu:2017:RLT, author = "Chang-Hong Hsu and Yunqi Zhang and Michael A. Laurenzano and David Meisner and Thomas Wenisch and Ronald G. Dreslinski and Jason Mars and Lingjia Tang", title = "Reining in Long Tails in Warehouse-Scale Computers with Quick Voltage Boosting Using Adrenaline", journal = j-TOCS, volume = "35", number = "1", pages = "2:1--2:??", month = jul, year = "2017", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3054742", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Jul 24 09:40:47 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Reducing the long tail of the query latency distribution in modern warehouse scale computers is critical for improving performance and quality of service (QoS) of workloads such as Web Search and Memcached. Traditional turbo boost increases a processor's voltage and frequency during a coarse-grained sliding window, boosting all queries that are processed during that window. However, the inability of such a technique to pinpoint tail queries for boosting limits its tail reduction benefit. In this work, we propose Adrenaline, an approach to leverage finer-granularity (tens of nanoseconds) voltage boosting to effectively rein in the tail latency with query-level precision. Two key insights underlie this work. First, emerging finer granularity voltage/frequency boosting is an enabling mechanism for intelligent allocation of the power budget to precisely boost only the queries that contribute to the tail latency; second, per-query characteristics can be used to design indicators for proactively pinpointing these queries, triggering boosting accordingly. Based on these insights, Adrenaline effectively pinpoints and boosts queries that are likely to increase the tail distribution and can reap more benefit from the voltage/frequency boost. By evaluating under various workload configurations, we demonstrate the effectiveness of our methodology. We achieve up to a 2.50 $ \times $ tail latency improvement for Memcached and up to a 3.03 $ \times $ for Web Search over coarse-grained dynamic voltage and frequency scaling (DVFS) given a fixed boosting power budget. When optimizing for energy reduction, Adrenaline achieves up to a 1.81 $ \times $ improvement for Memcached and up to a 1.99 $ \times $ for Web Search over coarse-grained DVFS. By using the carefully chosen boost thresholds, Adrenaline further improves the tail latency reduction to 4.82 $ \times $ over coarse-grained DVFS.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Chen:2017:FMT, author = "Haibo Chen and Rong Chen and Xingda Wei and Jiaxin Shi and Yanzhe Chen and Zhaoguo Wang and Binyu Zang and Haibing Guan", title = "Fast In-Memory Transaction Processing Using {RDMA} and {HTM}", journal = j-TOCS, volume = "35", number = "1", pages = "3:1--3:??", month = jul, year = "2017", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3092701", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Jul 24 09:40:47 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "DrTM is a fast in-memory transaction processing system that exploits advanced hardware features such as remote direct memory access (RDMA) and hardware transactional memory (HTM). To achieve high efficiency, it mostly offloads concurrency control such as tracking read/write accesses and conflict detection into HTM in a local machine and leverages the strong consistency between RDMA and HTM to ensure serializability among concurrent transactions across machines. To mitigate the high probability of HTM aborts for large transactions, we design and implement an optimized transaction chopping algorithm to decompose a set of large transactions into smaller pieces such that HTM is only required to protect each piece. We further build an efficient hash table for DrTM by leveraging HTM and RDMA to simplify the design and notably improve the performance. We describe how DrTM supports common database features like read-only transactions and logging for durability. Evaluation using typical OLTP workloads including TPC-C and SmallBank shows that DrTM has better single-node efficiency and scales well on a six-node cluster; it achieves greater than 1.51, 34 and 5.24, 138 million transactions per second for TPC-C and SmallBank on a single node and the cluster, respectively. Such numbers outperform a state-of-the-art single-node system (i.e., Silo) and a distributed transaction system (i.e., Calvin) by at least 1.9X and 29.6X for TPC-C.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Zhao:2017:UMR, author = "Minshu Zhao and Donald Yeung", title = "Using Multicore Reuse Distance to Study Coherence Directories", journal = j-TOCS, volume = "35", number = "2", pages = "4:1--4:??", month = oct, year = "2017", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3092702", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Oct 10 17:48:24 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Researchers have proposed numerous techniques to improve the scalability of coherence directories. The effectiveness of these techniques not only depends on application behavior, but also on the CPU's configuration, for example, its core count and cache size. As CPUs continue to scale, it is essential to explore the directory's application and architecture dependencies. However, this is challenging given the slow speed of simulators. While it is common practice to simulate different applications, previous research on directory designs have explored only a few-and in most cases, only one-CPU configuration, which can lead to an incomplete and inaccurate view of the directory's behavior. This article proposes to use multicore reuse distance analysis to study coherence directories. We develop a framework to extract the directory access stream from parallel least recently used (LRU) stacks, enabling rapid analysis of the directory's accesses and contents across both core count and cache size scaling. A key part of our framework is the notion of relative reuse distance between sharers, which defines sharing in a capacity-dependent fashion and facilitates our analyses along the data cache size dimension. We implement our framework in a profiler and then apply it to gain insights into the impact of multicore CPU scaling on directory behavior. Our profiling results show that directory accesses reduce by 3.3$ \times $ when scaling the data cache size from 16KB to 1MB, despite an increase in sharing-based directory accesses. We also show that increased sharing caused by data cache scaling allows the portion of on-chip memory occupied by the directory to be reduced by 43.3\%, compared to a reduction of only 2.6\% when scaling the number of cores. And, we show certain directory entries exhibit high temporal reuse. In addition to gaining insights, we also validate our profile-based results, and find they are within 2--10\% of cache simulations on average, across different validation experiments. Finally, we conduct four case studies that illustrate our insights on existing directory techniques. In particular, we demonstrate our directory occupancy insights on a Cuckoo directory; we apply our sharing insights to provide bounds on the size of Scalable Coherence Directories (SCD) and Dual-Grain Directories (DGD); and, we demonstrate our directory entry reuse insights on a multilevel directory design.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Chun:2017:ARR, author = "Byung-Gon Chun and Tyson Condie and Yingda Chen and Brian Cho and Andrew Chung and Carlo Curino and Chris Douglas and Matteo Interlandi and Beomyeol Jeon and Joo Seong Jeong and Gyewon Lee and Yunseong Lee and Tony Majestro and Dahlia Malkhi and Sergiy Matusevych and Brandon Myers and Mariia Mykhailova and Shravan Narayanamurthy and Joseph Noor and Raghu Ramakrishnan and Sriram Rao and Russell Sears and Beysim Sezgin and Taegeon Um and Julia Wang and Markus Weimer and Youngseok Yang", title = "{Apache REEF}: Retainable Evaluator Execution Framework", journal = j-TOCS, volume = "35", number = "2", pages = "5:1--5:??", month = oct, year = "2017", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3132037", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Oct 10 17:48:24 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Resource Managers like YARN and Mesos have emerged as a critical layer in the cloud computing system stack, but the developer abstractions for leasing cluster resources and instantiating application logic are very low level. This flexibility comes at a high cost in terms of developer effort, as each application must repeatedly tackle the same challenges (e.g., fault tolerance, task scheduling and coordination) and reimplement common mechanisms (e.g., caching, bulk-data transfers). This article presents REEF, a development framework that provides a control plane for scheduling and coordinating task-level (data-plane) work on cluster resources obtained from a Resource Manager. REEF provides mechanisms that facilitate resource reuse for data caching and state management abstractions that greatly ease the development of elastic data processing pipelines on cloud platforms that support a Resource Manager service. We illustrate the power of REEF by showing applications built atop: a distributed shell application, a machine-learning framework, a distributed in-memory caching system, and a port of the CORFU system. REEF is currently an Apache top-level project that has attracted contributors from several institutions and it is being used to develop several commercial offerings such as the Azure Stream Analytics service.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Shen:2017:SLC, author = "Zhiming Shen and Qin Jia and Gur-Eyal Sela and Weijia Song and Hakim Weatherspoon and Robbert {Van Renesse}", title = "{Supercloud}: a Library Cloud for Exploiting Cloud Diversity", journal = j-TOCS, volume = "35", number = "2", pages = "6:1--6:??", month = oct, year = "2017", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3132038", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Oct 10 17:48:24 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Infrastructure-as-a-Service (IaaS) cloud providers hide available interfaces for virtual machine (VM) placement and migration, CPU capping, memory ballooning, page sharing, and I/O throttling, limiting the ways in which applications can optimally configure resources or respond to dynamically shifting workloads. Given these interfaces, applications could migrate VMs in response to diurnal workloads or changing prices, adjust resources in response to load changes, and so on. This article proposes a new abstraction that we call a Library Cloud and that allows users to customize the diverse available cloud resources to best serve their applications. We built a prototype of a Library Cloud that we call the Supercloud. The Supercloud encapsulates applications in a virtual cloud under users' full control and can incorporate one or more availability zones within a cloud provider or across different providers. The Supercloud provides virtual machine, storage, and networking complete with a full set of management operations, allowing applications to optimize performance. In this article, we demonstrate various innovations enabled by the Library Cloud.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Diegues:2017:SPS, author = "Nuno Diegues and Paolo Romano and Stoyan Garbatov", title = "{Seer}: Probabilistic Scheduling for Hardware Transactional Memory", journal = j-TOCS, volume = "35", number = "3", pages = "7:1--7:41", month = dec, year = "2017", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3132036", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Dec 27 09:34:24 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "The ubiquity of multicore processors has led programmers to write parallel and concurrent applications to take advantage of the underlying hardware and speed up their executions. In this context, Transactional Memory (TM) has emerged as a simple and effective synchronization paradigm, via the familiar abstraction of atomic transactions. After many years of intense research, major processor manufacturers (including Intel) have recently released mainstream processors with hardware support for TM (HTM). In this work, we study a relevant issue with great impact on the performance of HTM. Due to the optimistic and inherently limited nature of HTM, transactions may have to be aborted and restarted numerous times, without any progress guarantee. As a result, it is up to the software library that regulates the HTM usage to ensure progress and optimize performance. Transaction scheduling is probably one of the most well-studied and effective techniques to achieve these goals. However, these recent mainstream HTMs have some technical limitations that prevent the adoption of known scheduling techniques: unlike software implementations of TM used in the past, existing HTMs provide limited or no information on which memory regions or contending transactions caused the abort. To address this crucial issue for HTMs, we propose Seer, a software scheduler that addresses precisely this restriction of HTM by leveraging on an online probabilistic inference technique that identifies the most likely conflict relations and establishes a dynamic locking scheme to serialize transactions in a fine-grained manner. The key idea of our solution is to constrain the portions of parallelism that are affecting negatively the whole system. As a result, this not only prevents performance reduction but also in fact unveils further scalability and performance for HTM. Via an extensive evaluation study, we show that Seer improves the performance of the Intel's HTM by up to 3.6$ \times $, and by 65\% on average across all concurrency degrees and benchmarks on a large processor with 28 cores.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Nishtala:2017:HAI, author = "Rajiv Nishtala and Paul Carpenter and Vinicius Petrucci and Xavier Martorell", title = "The {Hipster} Approach for Improving Cloud System Efficiency", journal = j-TOCS, volume = "35", number = "3", pages = "8:1--8:28", month = dec, year = "2017", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3144168", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Dec 27 09:34:24 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "In 2013, U.S. data centers accounted for 2.2\% of the country's total electricity consumption, a figure that is projected to increase rapidly over the next decade. Many important data center workloads in cloud computing are interactive, and they demand strict levels of quality-of-service (QoS) to meet user expectations, making it challenging to optimize power consumption along with increasing performance demands. This article introduces Hipster, a technique that combines heuristics and reinforcement learning to improve resource efficiency in cloud systems. Hipster explores heterogeneous multi-cores and dynamic voltage and frequency scaling for reducing energy consumption while managing the QoS of the latency-critical workloads. To improve data center utilization and make best usage of the available resources, Hipster can dynamically assign remaining cores to batch workloads without violating the QoS constraints for the latency-critical workloads. We perform experiments using a 64-bit ARM big.LITTLE platform and show that, compared to prior work, Hipster improves the QoS guarantee for Web-Search from 80\% to 96\%, and for Memcached from 92\% to 99\%, while reducing the energy consumption by up to 18\%. Hipster is also effective in learning and adapting automatically to specific requirements of new incoming workloads just enough to meet the QoS and optimize resource consumption.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Cherupalli:2017:DAS, author = "Hari Cherupalli and Henry Duwe and Weidong Ye and Rakesh Kumar and John Sartori", title = "Determining Application-Specific Peak Power and Energy Requirements for Ultra-Low-Power Processors", journal = j-TOCS, volume = "35", number = "3", pages = "9:1--9:33", month = dec, year = "2017", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3148052", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Dec 27 09:34:24 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Many emerging applications such as the Internet of Things, wearables, implantables, and sensor networks are constrained by power and energy. These applications rely on ultra-low-power processors that have rapidly become the most abundant type of processor manufactured today. In the ultra-low-power embedded systems used by these applications, peak power and energy requirements are the primary factors that determine critical system characteristics, such as size, weight, cost, and lifetime. While the power and energy requirements of these systems tend to be application specific, conventional techniques for rating peak power and energy cannot accurately bound the power and energy requirements of an application running on a processor, leading to overprovisioning that increases system size and weight. In this article, we present an automated technique that performs hardware-software coanalysis of the application and ultra-low-power processor in an embedded system to determine application-specific peak power and energy requirements. Our technique provides more accurate, tighter bounds than conventional techniques for determining peak power and energy requirements. Also, unlike conventional approaches, our technique reports guaranteed bounds on peak power and energy independent of an application's input set. Tighter bounds on peak power and energy can be exploited to reduce system size, weight, and cost.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Belay:2017:CIO, author = "Adam Belay and George Prekas and Mia Primorac and Ana Klimovic and Samuel Grossman and Christos Kozyrakis and Edouard Bugnion", title = "Corrigendum to {``The IX Operating System: Combining Low Latency, High Throughput and Efficiency in a Protected Dataplane''}", journal = j-TOCS, volume = "35", number = "3", pages = "10:1--10:1", month = dec, year = "2017", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3154292", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Dec 29 17:57:41 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", note = "See \cite{Belay:2017:IOS}.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Mace:2018:PTD, author = "Jonathan Mace and Ryan Roelke and Rodrigo Fonseca", title = "Pivot Tracing: Dynamic Causal Monitoring for Distributed Systems", journal = j-TOCS, volume = "35", number = "4", pages = "11:1--11:??", month = dec, year = "2018", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3208104", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Sep 21 11:44:29 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3208104", abstract = "Monitoring and troubleshooting distributed systems is notoriously difficult; potential problems are complex, varied, and unpredictable. The monitoring and diagnosis tools commonly used today-logs, counters, and metrics-have two important limitations: what gets recorded is defined a priori, and the information is recorded in a component- or machine-centric way, making it extremely hard to correlate events that cross these boundaries. This article presents Pivot Tracing, a monitoring framework for distributed systems that addresses both limitations by combining dynamic instrumentation with a novel relational operator: the happened-before join. Pivot Tracing gives users, at runtime, the ability to define arbitrary metrics at one point of the system, while being able to select, filter, and group by events meaningful at other parts of the system, even when crossing component or machine boundaries. We have implemented a prototype of Pivot Tracing for Java-based systems and evaluate it on a heterogeneous Hadoop cluster comprising HDFS, HBase, MapReduce, and YARN. We show that Pivot Tracing can effectively identify a diverse range of root causes such as software bugs, misconfiguration, and limping hardware. We show that Pivot Tracing is dynamic, extensible, and enables cross-tier analysis between inter-operating applications, with low execution overhead.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Zhang:2018:BCT, author = "Irene Zhang and Naveen Kr. Sharma and Adriana Szekeres and Arvind Krishnamurthy and Dan R. K. Ports", title = "Building Consistent Transactions with Inconsistent Replication", journal = j-TOCS, volume = "35", number = "4", pages = "12:1--12:??", month = dec, year = "2018", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3269981", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Sep 21 11:44:29 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3269981", abstract = "Application programmers increasingly prefer distributed storage systems with strong consistency and distributed transactions (e.g., Google's Spanner) for their strong guarantees and ease of use. Unfortunately, existing transactional storage systems are expensive to use-in part, because they require costly replication protocols, like Paxos, for fault tolerance. In this article, we present a new approach that makes transactional storage systems more affordable: We eliminate consistency from the replication protocol, while still providing distributed transactions with strong consistency to applications. We present the Transactional Application Protocol for Inconsistent Replication (TAPIR), the first transaction protocol to use a novel replication protocol, called inconsistent replication, that provides fault tolerance without consistency. By enforcing strong consistency only in the transaction protocol, TAPIR can commit transactions in a single round-trip and order distributed transactions without centralized coordination. We demonstrate the use of TAPIR in a transactional key-value store, TAPIR-KV. Compared to conventional systems, TAPIR-KV provides better latency and better throughput.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Hunt:2018:RDS, author = "Tyler Hunt and Zhiting Zhu and Yuanzhong Xu and Simon Peter and Emmett Witchel", title = "{Ryoan}: a Distributed Sandbox for Untrusted Computation on Secret Data", journal = j-TOCS, volume = "35", number = "4", pages = "13:1--13:??", month = dec, year = "2018", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3231594", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Sep 21 11:44:29 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3231594", abstract = "Users of modern data-processing services such as tax preparation or genomic screening are forced to trust them with data that the users wish to keep secret. Ryoan$^1$ protects secret data while it is processed by services that the data owner does not trust. Accomplishing this goal in a distributed setting is difficult, because the user has no control over the service providers or the computational platform. Confining code to prevent it from leaking secrets is notoriously difficult, but Ryoan benefits from new hardware and a request-oriented data model. Ryoan provides a distributed sandbox, leveraging hardware enclaves (e.g., Intel's software guard extensions (SGX) [40]) to protect sandbox instances from potentially malicious computing platforms. The protected sandbox instances confine untrusted data-processing modules to prevent leakage of the user's input data. Ryoan is designed for a request-oriented data model, where confined modules only process input once and do not persist state about the input. We present the design and prototype implementation of Ryoan and evaluate it on a series of challenging problems including email filtering, health analysis, image processing and machine translation.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Guerraoui:2019:LUA, author = "Rachid Guerraoui and Hugo Guiroux and Renaud Lachaize and Vivien Qu{\'e}ma and Vasileios Trigonakis", title = "Lock--Unlock: Is That All? {A} Pragmatic Analysis of Locking in Software Systems", journal = j-TOCS, volume = "36", number = "1", pages = "1:1--1:??", month = mar, year = "2019", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3301501", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Sep 21 11:44:29 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3301501", abstract = "A plethora of optimized mutex lock algorithms have been designed over the past 25 years to mitigate performance bottlenecks related to critical sections and locks. Unfortunately, there is currently no broad study of the behavior of these optimized lock algorithms on realistic applications that consider different performance metrics, such as energy efficiency and tail latency. In this article, we perform a thorough and practical analysis of synchronization, with the goal of providing software developers with enough information to design fast, scalable, and energy-efficient synchronization in their systems. First, we perform a performance study of 28 state-of-the-art mutex lock algorithms, on 40 applications, on four different multicore machines. We consider not only throughput (traditionally the main performance metric) but also energy efficiency and tail latency, which are becoming increasingly important. Second, we present an in-depth analysis in which we summarize our findings for all the studied applications. In particular, we describe nine different lock-related performance bottlenecks, and we propose six guidelines helping software developers with their choice of a lock algorithm according to the different lock properties and the application characteristics. From our detailed analysis, we make several observations regarding locking algorithms and application behaviors, several of which have not been previously discovered: (i) applications stress not only the lock-unlock interface but also the full locking API (e.g., trylocks, condition variables); (ii) the memory footprint of a lock can directly affect the application performance; (iii) for many applications, the interaction between locks and scheduling is an important application performance factor; (vi) lock tail latencies may or may not affect application tail latency; (v) no single lock is systematically the best; (vi) choosing the best lock is difficult; and (vii) energy efficiency and throughput go hand in hand in the context of lock algorithms. These findings highlight that locking involves more considerations than the simple lock/unlock interface and call for further research on designing low-memory footprint adaptive locks that fully and efficiently support the full lock interface, and consider all performance metrics.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Zhao:2019:VER, author = "Boyan Zhao and Rui Hou and Jianbo Dong and Michael Huang and Sally A. Mckee and Qianlong Zhang and Yueji Liu and Ye Li and Lixin Zhang and Dan Meng", title = "{Venice}: an Effective Resource Sharing Architecture for Data Center Servers", journal = j-TOCS, volume = "36", number = "1", pages = "2:1--2:??", month = mar, year = "2019", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3310360", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Sep 21 11:44:29 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3310360", abstract = "Consolidated server racks are quickly becoming the standard infrastructure for engineering, business, medicine, and science. Such servers are still designed much in the way when they were organized as individual, distributed systems. Given that many fields rely on big-data analytics substantially, its cost-effectiveness and performance should be improved, which can be achieved by flexibly allowing resources to be shared across nodes. Here we describe Venice, a family of data-center server architectures that includes a strong communication substrate as a first-class resource. Venice supports a diverse set of resource-joining mechanisms that enables applications to leverage non-local resources efficiently. We have constructed a hardware prototype to better understand the implications of design decisions about system support for resource sharing. We use it to measure the performance of at-scale applications and to explore performance, power, and resource-sharing transparency tradeoffs (i.e., how many programming changes are needed). We analyze these tradeoffs for sharing memory, accelerators, and NICs. We find that reducing/hiding latency is particularly important, the chosen communication channels should match the sharing access patterns of the applications, and of which we can improve performance by exploiting inter-channel collaboration.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Shi:2019:DGC, author = "Xuanhua Shi and Zhixiang Ke and Yongluan Zhou and Hai Jin and Lu Lu and Xiong Zhang and Ligang He and Zhenyu Hu and Fei Wang", title = "{Deca}: a Garbage Collection Optimizer for In-Memory Data Processing", journal = j-TOCS, volume = "36", number = "1", pages = "3:1--3:??", month = mar, year = "2019", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3310361", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Sep 21 11:44:29 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3310361", abstract = "In-memory caching of intermediate data and active combining of data in shuffle buffers have been shown to be very effective in minimizing the recomputation and I/O cost in big data processing systems such as Spark and Flink. However, it has also been widely reported that these techniques would create a large amount of long-living data objects in the heap. These generated objects may quickly saturate the garbage collector, especially when handling a large dataset, and hence, limit the scalability of the system. To eliminate this problem, we propose a lifetime-based memory management framework, which, by automatically analyzing the user-defined functions and data types, obtains the expected lifetime of the data objects and then allocates and releases memory space accordingly to minimize the garbage collection overhead. In particular, we present Deca$^1$ a concrete implementation of our proposal on top of Spark, which transparently decomposes and groups objects with similar lifetimes into byte arrays and releases their space altogether when their lifetimes come to an end. When systems are processing very large data, Deca also provides field-oriented memory pages to ensure high compression efficiency. Extensive experimental studies using both synthetic and real datasets show that, in comparing to Spark, Deca is able to (1) reduce the garbage collection time by up to 99.9\%, (2) reduce the memory consumption by up to 46.6\% and the storage space by 23.4\%, (3) achieve 1.2$ \times $ to 22.7$ \times $ speedup in terms of execution time in cases without data spilling and 16$ \times $ to 41.6$ \times $ speedup in cases with data spilling, and (4) provide similar performance compared to domain-specific systems.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Jha:2019:DFS, author = "Sagar Jha and Jonathan Behrens and Theo Gkountouvas and Matthew Milano and Weijia Song and Edward Tremel and Robbert {Van Renesse} and Sydney Zink and Kenneth P. Birman", title = "{Derecho}: Fast State Machine Replication for Cloud Services", journal = j-TOCS, volume = "36", number = "2", pages = "4:1--4:??", month = apr, year = "2019", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3302258", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Sep 21 11:44:30 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", note = "See corrigendum \cite{Jha:2020:CDF}.", URL = "https://dl.acm.org/ft_gateway.cfm?id=3302258", abstract = "Cloud computing services often replicate data and may require ways to coordinate distributed actions. Here we present Derecho, a library for such tasks. The API provides interfaces for structuring applications into patterns of subgroups and shards, supports state machine replication within them, and includes mechanisms that assist in restart after failures. Running over 100Gbps RDMA, Derecho can send millions of events per second in each subgroup or shard and throughput peaks at 16GB/s, substantially outperforming prior solutions. Configured to run purely on TCP, Derecho is still substantially faster than comparable widely used, highly-tuned, standard tools. The key insight is that on modern hardware (including non-RDMA networks), data-intensive protocols should be built from non-blocking data-flow components.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Bergman:2019:SSO, author = "Shai Bergman and Tanya Brokhman and Tzachi Cohen and Mark Silberstein", title = "{SPIN}: Seamless Operating System Integration of Peer-to-Peer {DMA} Between {SSDs} and {GPUs}", journal = j-TOCS, volume = "36", number = "2", pages = "5:1--5:??", month = apr, year = "2019", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3309987", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Sep 21 11:44:30 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3309987", abstract = "Recent GPUs enable Peer-to-Peer Direct Memory Access ( p2p) from fast peripheral devices like NVMe SSDs to exclude the CPU from the data path between them for efficiency. Unfortunately, using p2p to access files is challenging because of the subtleties of low-level non-standard interfaces, which bypass the OS file I/O layers and may hurt system performance. Developers must possess intimate knowledge of low-level interfaces to manually handle the subtleties of data consistency and misaligned accesses. We present SPIN, which integrates p2p into the standard OS file I/O stack, dynamically activating p2p where appropriate, transparently to the user. It combines p2p with page cache accesses, re-enables read-ahead for sequential reads, all while maintaining standard POSIX FS consistency, portability across GPUs and SSDs, and compatibility with virtual block devices such as software RAID. We evaluate SPIN on NVIDIA and AMD GPUs using standard file I/O benchmarks, application traces, and end-to-end experiments. SPIN achieves significant performance speedups across a wide range of workloads, exceeding p2p throughput by up to an order of magnitude. It also boosts the performance of an aerial imagery rendering application by 2.6$ \times $ by dynamically adapting to its input-dependent file access pattern, enables 3.3$ \times $ higher throughput for a GPU-accelerated log server, and enables 29\% faster execution for the highly optimized GPU-accelerated image collage with only 30 changed lines of code.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Novakovic:2019:MLI, author = "Stanko Novakovic and Alexandros Daglis and Dmitrii Ustiugov and Edouard Bugnion and Babak Falsafi and Boris Grot", title = "Mitigating Load Imbalance in Distributed Data Serving with Rack-Scale Memory Pooling", journal = j-TOCS, volume = "36", number = "2", pages = "6:1--6:??", month = apr, year = "2019", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3309986", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Sep 21 11:44:30 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3309986", abstract = "To provide low-latency and high-throughput guarantees, most large key-value stores keep the data in the memory of many servers. Despite the natural parallelism across lookups, the load imbalance, introduced by heavy skew in the popularity distribution of keys, limits performance. To avoid violating tail latency service-level objectives, systems tend to keep server utilization low and organize the data in micro-shards, which provides units of migration and replication for the purpose of load balancing. These techniques reduce the skew but incur additional monitoring, data replication, and consistency maintenance overheads. In this work, we introduce RackOut, a memory pooling technique that leverages the one-sided remote read primitive of emerging rack-scale systems to mitigate load imbalance while respecting service-level objectives. In RackOut, the data are aggregated at rack-scale granularity, with all of the participating servers in the rack jointly servicing all of the rack's micro-shards. We develop a queuing model to evaluate the impact of RackOut at the datacenter scale. In addition, we implement a RackOut proof-of-concept key-value store, evaluate it on two experimental platforms based on RDMA and Scale-Out NUMA, and use these results to validate the model. We devise two distinct approaches to load balancing within a RackOut unit, one based on random selection of nodes --- RackOut\_static --- and another one based on an adaptive load balancing mechanism-RackOut\_adaptive. Our results show that RackOut\_static increases throughput by up to 6$ \times $ for RDMA and 8.6$ \times $ for Scale-Out NUMA compared to a scale-out deployment, while respecting tight tail latency service-level objectives. RackOut\_adaptive improves the throughput by 30\% for workloads with 20\% of writes over RackOut\_static.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Iturbe:2019:ATC, author = "Xabier Iturbe and Balaji Venu and Emre Ozer and Jean-Luc Poupat and Gregoire Gimenez and Hans-Ulrich Zurek", title = "The {Arm Triple Core Lock-Step (TCLS)} Processor", journal = j-TOCS, volume = "36", number = "3", pages = "7:1--7:??", month = aug, year = "2019", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3323917", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Sep 21 11:44:30 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3323917", abstract = "The Arm Triple Core Lock-Step (TCLS) architecture is the natural evolution of Arm Cortex-R Dual Core Lock-Step (DCLS) processors to increase dependability, predictability, and availability in safety-critical and ultra-reliable applications. TCLS is simple, scalable, and easy to deploy in applications where Arm DCLS processors are widely used (e.g., automotive), as well as in new sectors where the presence of Arm technology is incipient (e.g., enterprise) or almost non-existent (e.g., space). Specifically in space, COTS Arm processors provide optimal power-to-performance, extensibility, evolvability, software availability, and ease of use, especially in comparison with the decades old rad-hard computing solutions that are still in use. This article discusses the fundamentals of an Arm Cortex-R5 based TCLS processor, providing key functioning and implementation details. The article shows that the TCLS architecture keeps the use of rad-hard technology to a minimum, namely, using rad-hard by design standard cell libraries only to protect the critical parts that account for less than 4\% of the entire TCLS solution. Moreover, when exposure to radiation is relatively low, such as in terrestrial applications or even satellites operating in Low Earth Orbits (LEO), the system could be implemented entirely using commercial cell libraries, relying on the radiation mitigation methods implemented on the TCLS to cope with sporadic soft errors in its critical parts. The TCLS solution allows thus to significantly reduce chip manufacturing costs and keep pace with advances in low power consumption and high density integration by leveraging commercial semiconductor processes, while matching the reliability levels and improving availability that can be achieved using extremely expensive rad-hard semiconductor processes. Finally, the article describes a TRL4 proof-of-concept TCLS-based System-on-Chip (SoC) that has been prototyped and tested to power the computer on-board an Airbus Defence and Space telecom satellite. When compared to the currently used processor solution by Airbus, the TCLS-based SoC results in a more than 5$ \times $ performance increase and cuts power consumption by more than half.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Ainsworth:2019:SPI, author = "Sam Ainsworth and Timothy M. Jones", title = "Software Prefetching for Indirect Memory Accesses: a Microarchitectural Perspective", journal = j-TOCS, volume = "36", number = "3", pages = "8:1--8:??", month = aug, year = "2019", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3319393", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Sep 21 11:44:30 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3319393", abstract = "Many modern data processing and HPC workloads are heavily memory-latency bound. A tempting proposition to solve this is software prefetching, where special non-blocking loads are used to bring data into the cache hierarchy just before being required. However, these are difficult to insert to effectively improve performance, and techniques for automatic insertion are currently limited. This article develops a novel compiler pass to automatically generate software prefetches for indirect memory accesses, a special class of irregular memory accesses often seen in high-performance workloads. We evaluate this across a wide set of systems, all of which gain benefit from the technique. We then evaluate the extent to which good prefetch instructions are architecture dependent and the class of programs that are particularly amenable. Across a set of memory-bound benchmarks, our automated pass achieves average speedups of 1.3$ \times $ for an Intel Haswell processor, 1.1$ \times $ for both an ARM Cortex-A57 and Qualcomm Kryo, 1.2$ \times $ for a Cortex-72 and an Intel Kaby Lake, and 1.35$ \times $ for an Intel Xeon Phi Knight's Landing, each of which is an out-of-order core, and performance improvements of 2.1$ \times $ and 2.7$ \times $ for the in-order ARM Cortex-A53 and first generation Intel Xeon Phi.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Chen:2019:ISA, author = "Yunji Chen and Huiying Lan and Zidong Du and Shaoli Liu and Jinhua Tao and Dong Han and Tao Luo and Qi Guo and Ling Li and Yuan Xie and Tianshi Chen", title = "An Instruction Set Architecture for Machine Learning", journal = j-TOCS, volume = "36", number = "3", pages = "9:1--9:??", month = aug, year = "2019", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3331469", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Sep 21 11:44:30 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3331469", abstract = "Machine Learning (ML) are a family of models for learning from the data to improve performance on a certain task. ML techniques, especially recent renewed neural networks (deep neural networks), have proven to be efficient for a broad range of applications. ML techniques are conventionally executed on general-purpose processors (such as CPU and GPGPU), which usually are not energy efficient, since they invest excessive hardware resources to flexibly support various workloads. Consequently, application-specific hardware accelerators have been proposed recently to improve energy efficiency. However, such accelerators were designed for a small set of ML techniques sharing similar computational patterns, and they adopt complex and informative instructions (control signals) directly corresponding to high-level functional blocks of an ML technique (such as layers in neural networks) or even an ML as a whole. Although straightforward and easy to implement for a limited set of similar ML techniques, the lack of agility in the instruction set prevents such accelerator designs from supporting a variety of different ML techniques with sufficient flexibility and efficiency. In this article, we first propose a novel domain-specific Instruction Set Architecture (ISA) for NN accelerators, called Cambricon, which is a load-store architecture that integrates scalar, vector, matrix, logical, data transfer, and control instructions, based on a comprehensive analysis of existing NN techniques. We then extend the application scope of Cambricon from NN to ML techniques. We also propose an assembly language, an assembler, and runtime to support programming with Cambricon, especially targeting large-scale ML problems. Our evaluation over a total of 16 representative yet distinct ML techniques have demonstrated that Cambricon exhibits strong descriptive capacity over a broad range of ML techniques and provides higher code density than general-purpose ISAs such as x86, MIPS, and GPGPU. Compared to the latest state-of-the-art NN accelerator design DaDianNao [7] (which can only accommodate three types of NN techniques), our Cambricon-based accelerator prototype implemented in TSMC 65nm technology incurs only negligible latency/power/area overheads, with a versatile coverage of 10 different NN benchmarks and 7 other ML benchmarks. Compared to the recent prevalent ML accelerator PuDianNao, our Cambricon-based accelerator is able to support all the ML techniques as well as the 10 NNs but with only approximate 5.1\% performance loss.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Bai:2020:EDS, author = "Jia-Ju Bai and Julia Lawall and Shi-Min Hu", title = "Effective Detection of Sleep-in-atomic-context Bugs in the {Linux} Kernel", journal = j-TOCS, volume = "36", number = "4", pages = "10:1--10:30", month = jun, year = "2020", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3381990", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Jun 12 07:20:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/linux.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3381990", abstract = "Atomic context is an execution state of the Linux kernel in which kernel code monopolizes a CPU core. In this state, the Linux kernel may only perform operations that cannot sleep, as otherwise a system hang or crash may occur. We refer to this kind of concurrency bug as a sleep-in-atomic-context (SAC) bug. In practice, SAC bugs are hard to find, as they do not cause problems in all executions.\par In this article, we propose a practical static approach named DSAC to effectively detect SAC bugs in the Linux kernel. DSAC uses three key techniques: (1) a summary-based analysis to identify the code that may be executed in atomic context, (2) a connection-based alias analysis to identify the set of functions referenced by a function pointer, and (3) a path-check method to filter out repeated reports and false bugs. We evaluate DSAC on Linux 4.17 and find 1,159 SAC bugs. We manually check all the bugs and find that 1,068 bugs are real. We have randomly selected 300 of the real bugs and sent them to kernel developers. 220 of these bugs have been confirmed, and 51 of our patches fixing 115 bugs have been applied.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Malkhi:2020:ISI, author = "Dahlia Malkhi and Dan Tsafrir", title = "Introduction to the Special Issue on the Award Papers of {USENIX ATC 2019}", journal = j-TOCS, volume = "36", number = "4", pages = "11:1--11:2", month = jun, year = "2020", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3395034", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Jun 12 07:20:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3395034", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Balmau:2020:SPL, author = "Oana Balmau and Florin Dinu and Willy Zwaenepoel and Karan Gupta and Ravishankar Chandhiramoorthi and Diego Didona", title = "{SILK+} Preventing Latency Spikes in Log-Structured Merge Key--Value Stores Running Heterogeneous Workloads", journal = j-TOCS, volume = "36", number = "4", pages = "12:1--12:27", month = jun, year = "2020", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3380905", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Jun 12 07:20:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3380905", abstract = "Log-Structured Merge Key-Value stores (LSM KVs) are designed to offer good write performance, by capturing client writes in memory, and only later flushing them to storage. Writes are later compacted into a tree-like data structure on disk to improve \ldots{}", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Leesatapornwongsa:2020:TWT, author = "Tanakorn Leesatapornwongsa and Aritra Sengupta and Masoud Saeida Ardekani and Gustavo Petri and Cesar A. Stuardo", title = "Transactuations: Where Transactions Meet the Physical World", journal = j-TOCS, volume = "36", number = "4", pages = "13:1--13:31", month = jun, year = "2020", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3380907", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Jun 12 07:20:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3380907", abstract = "A large class of IoT applications read sensors, execute application logic, and actuate actuators. However, the lack of high-level programming abstractions compromises correctness, especially in the presence of failures and unwanted interleaving between \ldots{}", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Spink:2020:RSL, author = "Tom Spink and Harry Wagstaff and Bj{\"o}rn Franke", title = "A Retargetable System-level {DBT} Hypervisor", journal = j-TOCS, volume = "36", number = "4", pages = "14:1--14:24", month = jun, year = "2020", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3386161", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Jun 12 07:20:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3386161", abstract = "System-level Dynamic Binary Translation (DBT) provides the capability to boot an Operating System (OS) and execute programs compiled for an Instruction Set Architecture (ISA) different from that of the host machine. Due to their performance-critical \ldots{}", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Jha:2020:CDF, author = "Sagar Jha", title = "Corrigendum to {``Derecho: Fast State Machine Replication for Cloud Services,'' by Jha et al., ACM Transactions on Computer Systems (TOCS) Volume {\bf 36}, Issue 2, Article No. 4}", journal = j-TOCS, volume = "36", number = "4", pages = "15:1--15:1", month = jun, year = "2020", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3395604", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Jun 12 07:20:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", note = "See \cite{Jha:2019:DFS}.", URL = "https://dl.acm.org/doi/abs/10.1145/3395604", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Sadrosadati:2021:HCL, author = "Mohammad Sadrosadati and Amirhossein Mirhosseini and Ali Hajiabadi and Seyed Borna Ehsani and Hajar Falahati and Hamid Sarbazi-Azad and Mario Drumond and Babak Falsafi and Rachata Ausavarungnirun and Onur Mutlu", title = "Highly Concurrent Latency-tolerant Register Files for {GPUs}", journal = j-TOCS, volume = "37", number = "1--4", pages = "1:1--1:36", month = mar, year = "2021", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3419973", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue May 25 09:04:45 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3419973", abstract = "Graphics Processing Units (GPUs) employ large register files to accommodate all active threads and accelerate context switching. Unfortunately, register files are a scalability bottleneck for future GPUs due to long access latency, high power consumption, and large silicon area provisioning. Prior work proposes hierarchical register file to reduce the register file power consumption by caching registers in a smaller register file cache. Unfortunately, this approach does not improve register access latency due to the low hit rate in the register file cache.\par In this article, we propose the Latency-Tolerant Register File (LTRF) architecture to achieve low latency in a two-level hierarchical structure while keeping power consumption low. We observe that compile-time interval analysis enables us to divide GPU program execution into intervals with an accurate estimate of a warp's aggregate register working-set within each interval. The key idea of LTRF is to prefetch the estimated register working-set from the main register file to the register file cache under software control, at the beginning of each interval, and overlap the prefetch latency with the execution of other warps. We observe that register bank conflicts while prefetching the registers could greatly reduce the effectiveness of LTRF. Therefore, we devise a compile-time register renumbering technique to reduce the likelihood of register bank conflicts. Our experimental results show that LTRF enables high-capacity yet long-latency main GPU register files, paving the way for various optimizations. As an example optimization, we implement the main register file with emerging high-density high-latency memory technologies, enabling $ 8 \times $ larger capacity and improving overall GPU performance by 34\%.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Zhang:2021:KSV, author = "Yiming Zhang and Chengfei Zhang and Yaozheng Wang and Kai Yu and Guangtao Xue and Jon Crowcroft", title = "{KylinX}: Simplified Virtualization Architecture for Specialized Virtual Appliances with Strong Isolation", journal = j-TOCS, volume = "37", number = "1--4", pages = "2:1--2:27", month = mar, year = "2021", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3436512", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue May 25 09:04:45 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", URL = "https://dl.acm.org/doi/10.1145/3436512", abstract = "Unikernel specializes a minimalistic LibOS and a target application into a standalone single-purpose virtual machine (VM) running on a hypervisor, which is referred to as (virtual) appliance. Compared to traditional VMs, Unikernel appliances have smaller memory footprint and lower overhead while guaranteeing the same level of isolation. On the downside, Unikernel strips off the process abstraction from its monolithic appliance and thus sacrifices flexibility, efficiency, and applicability.\par In this article, we examine whether there is a balance embracing the best of both Unikernel appliances (strong isolation) and processes (high flexibility/efficiency). We present KylinX, a dynamic library operating system for simplified and efficient cloud virtualization by providing the pVM (process-like VM) abstraction. A pVM takes the hypervisor as an OS and the Unikernel appliance as a process allowing both page-level and library-level dynamic mapping. At the page level, KylinX supports pVM fork plus a set of API for inter-pVM communication (IpC, which is compatible with conventional UNIX IPC). At the library level, KylinX supports shared libraries to be linked to a Unikernel appliance at runtime. KylinX enforces mapping restrictions against potential threats. We implement a prototype of KylinX by modifying MiniOS and Xen tools. Extensive experimental results show that KylinX achieves similar performance both in micro benchmarks (fork, IpC, library update, etc.) and in applications (Redis, web server, and DNS server) compared to conventional processes, while retaining the strong isolation benefit of VMs/Unikernels.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Richins:2021:ATH, author = "Daniel Richins and Dharmisha Doshi and Matthew Blackmore and Aswathy Thulaseedharan Nair and Neha Pathapati and Ankit Patel and Brainard Daguman and Daniel Dobrijalowski and Ramesh Illikkal and Kevin Long and David Zimmerman and Vijay Janapa Reddi", title = "{AI} Tax: The Hidden Cost of {AI} Data Center Applications", journal = j-TOCS, volume = "37", number = "1--4", pages = "3:1--3:32", month = mar, year = "2021", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3440689", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue May 25 09:04:45 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3440689", abstract = "Artificial intelligence and machine learning are experiencing widespread adoption in industry and academia. This has been driven by rapid advances in the applications and accuracy of AI through increasingly complex algorithms and models; this, in turn, has spurred research into specialized hardware AI accelerators. Given the rapid pace of advances, it is easy to forget that they are often developed and evaluated in a vacuum without considering the full application environment. This article emphasizes the need for a holistic, end-to-end analysis of artificial intelligence (AI) workloads and reveals the ``AI tax.'' We deploy and characterize Face Recognition in an edge data center. The application is an AI-centric edge video analytics application built using popular open source infrastructure and machine learning (ML) tools. Despite using state-of-the-art AI and ML algorithms, the application relies heavily on pre- and post-processing code. As AI-centric applications benefit from the acceleration promised by accelerators, we find they impose stresses on the hardware and software infrastructure: storage and network bandwidth become major bottlenecks with increasing AI acceleration. By specializing for AI applications, we show that a purpose-built edge data center can be designed for the stresses of accelerated AI at 15\% lower TCO than one derived from homogeneous servers and infrastructure.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Baskin:2021:UUN, author = "Chaim Baskin and Natan Liss and Eli Schwartz and Evgenii Zheltonozhskii and Raja Giryes and Alex M. Bronstein and Avi Mendelson", title = "{UNIQ}: Uniform Noise Injection for Non-Uniform Quantization of Neural Networks", journal = j-TOCS, volume = "37", number = "1--4", pages = "4:1--4:15", month = mar, year = "2021", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3444943", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue May 25 09:04:45 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3444943", abstract = "We present a novel method for neural network quantization. Our method, named UNIQ, emulates a non-uniform $k$-quantile quantizer and adapts the model to perform well with quantized weights by injecting noise to the weights at training time. As a by-product of injecting noise to weights, we find that activations can also be quantized to as low as 8-bit with only a minor accuracy degradation. Our non-uniform quantization approach provides a novel alternative to the existing uniform quantization techniques for neural networks. We further propose a novel complexity metric of number of bit operations performed (BOPs), and we show that this metric has a linear relation with logic utilization and power. We suggest evaluating the trade-off of accuracy vs. complexity (BOPs). The proposed method, when evaluated on ResNet18/34/50 and MobileNet on ImageNet, outperforms the prior state of the art both in the low-complexity regime and the high accuracy regime. We demonstrate the practical applicability of this approach, by implementing our non-uniformly quantized CNN on FPGA.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Zhuo:2021:DGP, author = "Youwei Zhuo and Jingji Chen and Gengyu Rao and Qinyi Luo and Yanzhi Wang and Hailong Yang and Depei Qian and Xuehai Qian", title = "Distributed Graph Processing System and Processing-in-memory Architecture with Precise Loop-carried Dependency Guarantee", journal = j-TOCS, volume = "37", number = "1--4", pages = "5:1--5:37", month = jun, year = "2021", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3453681", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Jul 2 08:25:18 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3453681", abstract = "To hide the complexity of the underlying system, graph processing frameworks ask programmers to specify graph computations in user-defined functions (UDFs) of graph-oriented programming model. Due to the nature of distributed execution, current \ldots{}", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Agate:2021:SSE, author = "Vincenzo Agate and Alessandra {De Paola} and Giuseppe {Lo Re} and Marco Morana", title = "A Simulation Software for the Evaluation of Vulnerabilities in Reputation Management Systems", journal = j-TOCS, volume = "37", number = "1--4", pages = "6:1--6:30", month = jun, year = "2021", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3458510", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Jul 2 08:25:18 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3458510", abstract = "Multi-agent distributed systems are characterized by autonomous entities that interact with each other to provide, and/or request, different kinds of services. In several contexts, especially when a reward is offered according to the quality of service, \ldots{}", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Ruaro:2021:MDM, author = "Marcelo Ruaro and Anderson Sant'ana and Axel Jantsch and Fernando Gehm Moraes", title = "Modular and Distributed Management of Many-Core {SoCs}", journal = j-TOCS, volume = "38", number = "1--2", pages = "1:1--1:16", month = jul, year = "2021", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3458511", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Aug 10 13:25:43 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3458511", abstract = "Many-Core Systems-on-Chip increasingly require Dynamic Multi-objective Management (DMOM) of resources. DMOM uses different management components for objectives and resources to implement comprehensive and self-adaptive system resource management. DMOMs \ldots{}", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Markussen:2021:SZO, author = "Jonas Markussen and Lars Bj{\o}rlykke Kristiansen and P{\aa}l Halvorsen and Halvor Kielland-Gyrud and H{\aa}kon Kvale Stensland and Carsten Griwodz", title = "{SmartIO}: Zero-overhead Device Sharing through {PCIe} Networking", journal = j-TOCS, volume = "38", number = "1--2", pages = "2:1--2:78", month = jul, year = "2021", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3462545", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Aug 10 13:25:43 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3462545", abstract = "The large variety of compute-heavy and data-driven applications accelerate the need for a distributed I/O solution that enables cost-effective scaling of resources between networked hosts. For example, in a cluster system, different machines may have \ldots{}", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Katsikas:2021:MHP, author = "Georgios P. Katsikas and Tom Barbette and Dejan Kosti{\'c} and Gerald Q. {Maguire, Jr.} and Rebecca Steinert", title = "{Metron}: High-performance {NFV} Service Chaining Even in the Presence of Blackboxes", journal = j-TOCS, volume = "38", number = "1--2", pages = "3:1--3:45", month = jul, year = "2021", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3465628", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Aug 10 13:25:43 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3465628", abstract = "Deployment of 100Gigabit Ethernet (GbE) links challenges the packet processing limits of commodity hardware used for Network Functions Virtualization (NFV). Moreover, realizing chained network functions (i.e., service chains) necessitates the use of \ldots{}", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Zuo:2021:SIS, author = "Zhiqiang Zuo and Kai Wang and Aftab Hussain and Ardalan Amiri Sani and Yiyu Zhang and Shenming Lu and Wensheng Dou and Linzhang Wang and Xuandong Li and Chenxi Wang and Guoqing Harry Xu", title = "Systemizing Interprocedural Static Analysis of Large-scale Systems Code with {Graspan}", journal = j-TOCS, volume = "38", number = "1--2", pages = "4:1--4:39", month = jul, year = "2021", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3466820", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Tue Aug 10 13:25:43 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3466820", abstract = "There is more than a decade-long history of using static analysis to find bugs in systems such as Linux. Most of the existing static analyses developed for these systems are simple checkers that find bugs based on pattern matching. Despite the presence \ldots{}", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Song:2021:ANF, author = "Won Wook Song and Youngseok Yang and Jeongyoon Eo and Jangho Seo and Joo Yeon Kim and Sanha Lee and Gyewon Lee and Taegeon Um and Haeyoon Cho and Byung-Gon Chun", title = "{Apache Nemo}: a Framework for Optimizing Distributed Data Processing", journal = j-TOCS, volume = "38", number = "3--4", pages = "5:1--5:31", month = nov, year = "2021", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3468144", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Apr 18 11:45:45 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3468144", abstract = "Optimizing scheduling and communication of distributed data processing for resource and data characteristics is crucial for achieving high performance. Existing approaches to such optimizations largely fall into two categories. First, distributed runtimes \ldots{}", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Canakci:2021:SMB, author = "Burcu Canakci and Robbert {Van Renesse}", title = "Scaling Membership of {Byzantine} Consensus", journal = j-TOCS, volume = "38", number = "3--4", pages = "6:1--6:31", month = nov, year = "2021", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3473138", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Apr 18 11:45:45 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3473138", abstract = "Scaling Byzantine Fault Tolerant (BFT) systems in terms of membership is important for secure applications with large participation such as blockchains. While traditional protocols have low latency, they cannot handle many processors. Conversely, \ldots{}", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Kumar:2021:SSF, author = "Rakesh Kumar and Boris Grot", title = "Shooting Down the Server Front-End Bottleneck", journal = j-TOCS, volume = "38", number = "3--4", pages = "7:1--7:30", month = nov, year = "2021", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3484492", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Mon Apr 18 11:45:45 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3484492", abstract = "The front-end bottleneck is a well-established problem in server workloads owing to their deep software stacks and large instruction footprints. Despite years of research into effective L1-I and BTB prefetching, state-of-the-art techniques force a trade-. \ldots{}", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Lyerly:2021:ORT, author = "Robert Lyerly and Carlos Bilbao and Changwoo Min and Christopher J. Rossbach and Binoy Ravindran", title = "An {OpenMP} Runtime for Transparent Work Sharing across Cache-Incoherent Heterogeneous Nodes", journal = j-TOCS, volume = "39", number = "1--4", pages = "1:1--1:??", month = nov, year = "2021", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3505224", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Dec 8 06:35:07 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3505224", abstract = "In this work, we present libHetMP, an OpenMP runtime for automatically and transparently distributing parallel computation across heterogeneous \ldots{}", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Chen:2021:UHM, author = "Lei Chen and Jiacheng Zhao and Chenxi Wang and Ting Cao and John Zigman and Haris Volos and Onur Mutlu and Fang Lv and Xiaobing Feng and Guoqing Harry Xu and Huimin Cui", title = "Unified Holistic Memory Management Supporting Multiple Big Data Processing Frameworks over Hybrid Memories", journal = j-TOCS, volume = "39", number = "1--4", pages = "2:1--2:??", month = nov, year = "2021", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3511211", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Dec 8 06:35:07 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3511211", abstract = "To process real-world datasets, modern data-parallel systems often require extremely large amounts of memory, which are both costly and energy \ldots{}", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Boroujerdian:2021:RCA, author = "Behzad Boroujerdian and Hasan Genc and Srivatsan Krishnan and Bardienus Pieter Duisterhof and Brian Plancher and Kayvan Mansoorshahi and Marcelino Almeida and Wenzhi Cui and Aleksandra Faust and Vijay Janapa Reddi", title = "The Role of Compute in Autonomous Micro Aerial Vehicles: Optimizing for Mission Time and Energy Efficiency", journal = j-TOCS, volume = "39", number = "1--4", pages = "3:1--3:??", month = nov, year = "2021", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3511210", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Dec 8 06:35:07 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3511210", abstract = "Autonomous and mobile cyber-physical machines are becoming an inevitable part of our future. In particular, Micro Aerial Vehicles (MAVs) have seen a \ldots{}", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Blocher:2021:RAO, author = "Marcel Bl{\"o}cher and Emilio Coppa and Pascal Kleber and Patrick Eugster and William Culhane and Masoud Saeida Ardekani", title = "{ROME}: All Overlays Lead to Aggregation, but Some Are Faster than Others", journal = j-TOCS, volume = "39", number = "1--4", pages = "4:1--4:??", month = nov, year = "2021", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3516430", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Dec 8 06:35:07 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3516430", abstract = "Aggregation is common in data analytics and crucial to distilling information from large datasets, but current data analytics frameworks do not fully exploit \ldots{}", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Xing:2021:HCE, author = "Tong Xing and Antonio Barbalace and Pierre Olivier and Mohamed L. Karaoui and Wei Wang and Binoy Ravindran", title = "{H-Container}: Enabling Heterogeneous-{ISA} Container Migration in Edge Computing", journal = j-TOCS, volume = "39", number = "1--4", pages = "5:1--5:??", month = nov, year = "2021", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3524452", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Dec 8 06:35:07 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3524452", abstract = "Edge computing is a recent computing paradigm that brings cloud services closer to the client. Among other features, edge computing offers extremely low \ldots{}", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Xia:2021:BIP, author = "Yubin Xia and Dong Du and Zhichao Hua and Binyu Zang and Haibo Chen and Haibing Guan", title = "Boosting Inter-process Communication with Architectural Support", journal = j-TOCS, volume = "39", number = "1--4", pages = "6:1--6:??", month = nov, year = "2021", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3532861", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Dec 8 06:35:07 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3532861", abstract = "IPC (inter-process communication) is a critical mechanism for modern OSes, including not only microkernels such as seL4, QNX, and Fuchsia where system \ldots{}", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Diavastos:2022:EIS, author = "Andreas Diavastos and Trevor E. Carlson", title = "Efficient Instruction Scheduling Using Real-time Load Delay Tracking", journal = j-TOCS, volume = "40", number = "1--4", pages = "1:1--1:??", month = nov, year = "2022", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3548681", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Dec 8 06:35:07 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3548681", abstract = "Issue time prediction processors use dataflow dependencies and predefined instruction latencies to predict issue times of repeated instructions. In this work, \ldots{}", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Biswas:2022:UPR, author = "Arnab Kumar Biswas", title = "Using Pattern of On-Off Routers and Links and Router Delays to Protect Network-on-Chip Intellectual Property", journal = j-TOCS, volume = "40", number = "1--4", pages = "2:1--2:??", month = nov, year = "2022", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3548680", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Dec 8 06:35:07 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3548680", abstract = "Intellectual Property (IP) reuse is a well known practice in chip design processes. Nowadays, network-on-chips (NoCs) are increasingly used as IP and sold by \ldots{}", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Alkhatib:2023:PNP, author = "Basil Alkhatib and Sreeharsha Udayashankar and Sara Qunaibi and Ahmed Alquraan and Mohammed Alfatafta and Wael Al-Manasrah and Alex Depoutovitch and Samer Al-Kiswany", title = "Partial Network Partitioning", journal = j-TOCS, volume = "41", number = "1--4", pages = "1:1--1:??", month = nov, year = "2023", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3576192", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Feb 3 11:39:05 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3576192", abstract = "We present an extensive study focused on partial network partitioning. Partial network partitions disrupt the communication between some but not all nodes in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "1", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Sheff:2023:CRB, author = "Isaac Sheff and Xinwen Wang and Kushal Babel and Haobin Ni and Robbert van Renesse and Andrew C. Myers", title = "{Charlotte}: Reformulating Blockchains into a {Web} of Composable Attested Data Structures for Cross-Domain Applications", journal = j-TOCS, volume = "41", number = "1--4", pages = "2:1--2:??", month = nov, year = "2023", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3607534", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Feb 3 11:39:05 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3607534", abstract = "Cross- domain applications are rapidly adopting blockchain techniques for immutability, availability, integrity, and interoperability. However, for most \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "2", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Park:2023:FFM, author = "Jonggyu Park and Young Ik Eom", title = "Filesystem Fragmentation on Modern Storage Systems", journal = j-TOCS, volume = "41", number = "1--4", pages = "3:1--3:??", month = nov, year = "2023", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3611386", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Feb 3 11:39:05 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3611386", abstract = "Filesystem fragmentation has been one of the primary reasons for computer systems to get slower over time. However, there have been rapid changes in modern storage systems over the past decades, and modern storage devices such as solid state drives have different mechanisms to access data, compared with traditional rotational ones. In this article, we revisit filesystem fragmentation on modern computer systems from both performance and fairness perspectives. According to our extensive experiments, filesystem fragmentation not only degrades I/O performance of modern storage devices, but also incurs various problems related to I/O fairness, such as performance interference. Unfortunately, conventional defragmentation tools are designed primarily for hard disk drives and thus generate an unnecessarily large amount of I/Os for data migration. To mitigate such problems, this article present FragPicker, a new defragmentation tool for modern storage devices. FragPicker analyzes the I/O behaviors of each target application and defragments only necessary pieces of data whose migration can contribute to performance improvement, thereby effectively minimizing the I/O amount for defragmentation. Our evaluation with YCSB workload-C shows FragPicker reduces the total amount of I/O for defragmentation by around 66\% and the elapsed time by around 84\%, while showing a similar level of defragmentation effect.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "3", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Pellauer:2023:SOS, author = "Michael Pellauer and Jason Clemons and Vignesh Balaji and Neal Crago and Aamer Jaleel and Donghyuk Lee and Mike O'Connor and Anghsuman Parashar and Sean Treichler and Po-An Tsai and Stephen W. Keckler and Joel S. Emer", title = "{Symphony}: Orchestrating Sparse and Dense Tensors with Hierarchical Heterogeneous Processing", journal = j-TOCS, volume = "41", number = "1--4", pages = "4:1--4:??", month = nov, year = "2023", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3630007", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Feb 3 11:39:05 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3630007", abstract = "Sparse tensor algorithms are becoming widespread, particularly in the domains of deep learning, graph and data analytics, and scientific computing. Current \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "4", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Zhao:2023:MIB, author = "Jie Zhao and Jinchen Xu and Peng Di and Wang Nie and Jiahui Hu and Yanzhi Yi and Sijia Yang and Zhen Geng and Renwei Zhang and Bojie Li and Zhiliang Gan and Xuefeng Jin", title = "Modeling the Interplay between Loop Tiling and Fusion in Optimizing Compilers Using Affine Relations", journal = j-TOCS, volume = "41", number = "1--4", pages = "5:1--5:??", month = nov, year = "2023", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3635305", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Feb 3 11:39:05 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3635305", abstract = "Loop tiling and fusion are two essential transformations in optimizing compilers to enhance the data locality of programs. Existing heuristics either perform loop tiling and fusion in a particular order, missing some of their profitable compositions, or execute ad-hoc implementations for domain-specific applications, calling for a generalized and systematic solution in optimizing compilers.\par In this article, we present a so-called {\em basteln\/} (an abbreviation for backward slicing of tiled loop nests) strategy in polyhedral compilation to better model the interplay between loop tiling and fusion. The basteln strategy first groups loop nests by preserving their parallelism\slash tilability and next performs rectangular\slash parallelogram tiling to the output groups that produce data consumed outside the considered program fragment. The memory footprints required by each tile are then computed, from which the upward exposed data are extracted to determine the tile shapes of the remaining fusion groups. Such a tiling mechanism can construct complex tile shapes imposed by the dependences between these groups, which are further merged by a post-tiling fusion algorithm for enhancing data locality without losing the parallelism\slash tilability of the output groups. The basteln strategy also takes into account the amount of redundant computations and the fusion of independent groups, exhibiting a general applicability.\par We integrate the basteln strategy into two optimizing compilers, with one a general-purpose optimizer and the other a domain-specific compiler for deploying deep learning models. The experiments are conducted on CPU, GPU, and a deep learning accelerator to demonstrate the effectiveness of the approach for a wide class of application domains, including deep learning, image processing, sparse matrix computation, and linear algebra. In particular, the basteln strategy achieves a mean speedup of $ 1.8 \times $ over cuBLAS\slash cuDNN and $ 1.1 \times $ over TVM on GPU when used to optimize deep learning models; it also outperforms PPCG and TVM by 11\% and 20\%, respectively, when generating code for the deep learning accelerator.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "5", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Luo:2024:ORM, author = "Shutian Luo and Chenyu Lin and Kejiang Ye and Guoyao Xu and Liping Zhang and Guodong Yang and Huanle Xu and Chengzhong Xu", title = "Optimizing Resource Management for Shared Microservices: a Scalable System Design", journal = j-TOCS, volume = "42", number = "1--2", pages = "1:1--1:??", month = may, year = "2024", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3631607", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu May 16 10:49:47 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3631607", abstract = "A common approach to improving resource utilization in data centers is to adaptively provision resources based on the actual workload. One fundamental challenge of doing this in microservice management frameworks, however, is that different components of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "1", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Zhao:2024:CDC, author = "Laiping Zhao and Yushuai Cui and Yanan Yang and Xiaobo Zhou and Tie Qiu and Keqiu Li and Yungang Bao", title = "Component-distinguishable Co-location and Resource Reclamation for High-throughput Computing", journal = j-TOCS, volume = "42", number = "1--2", pages = "2:1--2:??", month = may, year = "2024", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3630006", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu May 16 10:49:47 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3630006", abstract = "Cloud service providers improve resource utilization by co-locating latency-critical (LC) workloads with best-effort batch (BE) jobs in datacenters. However, they usually treat multi-component LCs as monolithic applications and treat BEs as ``second-class \ldots{}''", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "2", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Kappes:2024:DFU, author = "Giorgos Kappes and Stergios V. Anastasiadis", title = "{Diciclo}: Flexible User-level Services for Efficient Multitenant Isolation", journal = j-TOCS, volume = "42", number = "1--2", pages = "3:1--3:??", month = may, year = "2024", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3639404", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu May 16 10:49:47 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", URL = "https://dl.acm.org/doi/10.1145/3639404", abstract = "Containers are a mainstream virtualization technique for running stateful workloads over persistent storage. In highly utilized multitenant hosts, resource contention at the system kernel leads to inefficient container input/output (I/O) handling. Although there are interesting techniques to address this issue, they incur high implementation complexity and execution overhead. As a cost-effective alternative, we introduce the Diciclo architecture with our assumptions, goals, and principles. For each tenant, Diciclo isolates the control and data I/O path at user level and runs dedicated storage systems. Diciclo includes the libservice unified user-level abstraction of system services and the node structure design pattern for the application and server side. We prototyped a toolkit of user-level components that comprise the library to invoke the standard I/O calls, the I/O communication mechanism, and the I/O services. Based on Diciclo, we built Danaus, a filesystem client that integrates a union filesystem with a Ceph distributed filesystem client and configurable shared cache. Across different host configurations, workloads, and systems, Danaus achieves improved performance stability, because it handles I/O with reserved per-tenant resources and avoids intensive kernel locking. Based on having built and evaluated Danaus, we share valuable lessons about resource contention, file management, service separation, and performance stability in multitenant systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "3", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Sha:2024:HSC, author = "Sai Sha and Chuandong Li and Xiaolin Wang and Zhenlin Wang and Yingwei Luo", title = "Hardware--Software Collaborative Tiered-Memory Management Framework for Virtualization", journal = j-TOCS, volume = "42", number = "1--2", pages = "4:1--4:??", month = may, year = "2024", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3639564", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu May 16 10:49:47 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", URL = "https://dl.acm.org/doi/10.1145/3639564", abstract = "The tiered-memory system can effectively expand the memory capacity for virtual machines (VMs). However, virtualization introduces new challenges specifically in enforcing performance isolation, minimizing context switching, and providing resource overcommit. None of the state-of-the-art designs consider virtualization and address these challenges; we observe that a VM with tiered memory incurs up to a $ 2 \times $ slowdown compared to a DRAM-only VM.\par We propose vTMM, a hardware-software collaborative tiered-memory management framework for virtualization. A key insight in vTMM is to leverage the unique system features in virtualization to meet the above challenges. vTMM automatically determines page hotness and migrates pages between fast and slow memory to achieve better performance. Specially, vTMM optimizes page tracking and migration based on page-modification logging (PML), a hardware-assisted virtualization mechanism, and adaptively distinguishes hot/cold pages through the page ``temperature'' sorting. vTMM also dynamically adjusts fast memory among multi-VMs on demand by using a memory pool. Further, vTMM tracks huge pages at regular-page granularity in hardware and splits/merges pages in software, realizing hybrid-grained page management and optimization. We implement and evaluate vTMM with single-grained page management on an Intel processor, and the hybrid-grained page management on a Sunway processor with hardware mode supporting hardware/software co-designs. Experiments show that vTMM outperforms existing tiered-memory management designs in virtualization.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "4", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{vanRenesse:2024:E, author = "Robbert van Renesse and Sam H. Noh", title = "Editorial", journal = j-TOCS, volume = "42", number = "3--4", pages = "5:1--5:??", month = nov, year = "2024", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3696656", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Nov 22 08:34:05 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3696656", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "5", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Lin:2024:THP, author = "Hao Lin and Zhenhua Li and Di Gao and Yunhao Liu and Feng Qian and Tianyin Xu and Bo Xiao and Xiaokang Qin", title = "{Trinity}: High-Performance and Reliable Mobile Emulation through Graphics Projection", journal = j-TOCS, volume = "42", number = "3--4", pages = "6:1--6:??", month = nov, year = "2024", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3643029", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Nov 22 08:34:05 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3643029", abstract = "Mobile emulation, which creates full-fledged software mobile devices on a physical PC/server, is pivotal to the mobile ecosystem. Unfortunately, existing mobile emulators perform poorly on graphics-intensive apps in terms of efficiency and compatibility. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "6", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Dang:2024:PHA, author = "Zheng Dang and Shuibing He and Xuechen Zhang and Peiyi Hong and Zhenxin Li and Xinyu Chen and Haozhe Song and Xian-He Sun and Gang Chen", title = "{PMAlloc}: a Holistic Approach to Improving Persistent Memory Allocation", journal = j-TOCS, volume = "42", number = "3--4", pages = "7:1--7:??", month = nov, year = "2024", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3643886", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Nov 22 08:34:05 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3643886", abstract = "Persistent memory allocation is a fundamental building block for developing high-performance and in-memory applications. Existing persistent memory allocators suffer from many performance issues. First, they may introduce repeated cache line flushes and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "7", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Jia:2024:BTD, author = "Zhipeng Jia and Emmett Witchel", title = "{Boki}: Towards Data Consistency and Fault Tolerance with Shared Logs in Stateful Serverless Computing", journal = j-TOCS, volume = "42", number = "3--4", pages = "8:1--8:??", month = nov, year = "2024", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3653072", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Nov 22 08:34:05 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3653072", abstract = "Bokiis a new serverless runtime that exports a shared log API to serverless functions. Boki shared logs enable stateful serverless applications to manage their state with durability, consistency, and fault tolerance. Boki shared logs achieve high \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "8", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Li:2024:SEB, author = "Tuo Li and Jia-Ju Bai and Yulei Sui and Shi-Min Hu", title = "{SPATA}: Effective {OS} Bug Detection with Summary-Based, Alias-Aware, and Path-Sensitive Typestate Analysis", journal = j-TOCS, volume = "42", number = "3--4", pages = "9:1--9:??", month = nov, year = "2024", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3695250", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Fri Nov 22 08:34:05 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", URL = "https://dl.acm.org/doi/10.1145/3695250", abstract = "The operating system (OS) is the cornerstone for computer systems. It manages hardware and provides fundamental service for user-level applications. Thus, detecting bugs in OSes is important to improve the reliability of computer systems. Static typestate \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "9", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Li:2025:FBM, author = "Changlong Li and Zongwei Zhu and Chun Jason Xue and Yu Liang and Rachata Ausavarungnirun and Liang Shi and Xuehai Zhou", title = "Freezing-based Memory and Process Co-design for User Experience on Resource-limited Mobile Devices", journal = j-TOCS, volume = "43", number = "1--2", pages = "1:1--1:??", month = may, year = "2025", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3714409", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Jun 14 09:34:50 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Mobile devices with limited resources are prevalent, as they have a relatively low price. Providing a good user experience with limited resources has been a big challenge. This work finds that foreground applications are often unexpectedly interfered by \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "1", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Fan:2025:XHP, author = "Shulin Fan and Zhichao Hua and Yubin Xia and Haibo Chen", title = "{XpuTEE}: a High-Performance and Practical Heterogeneous Trusted Execution Environment for {GPUs}", journal = j-TOCS, volume = "43", number = "1--2", pages = "2:1--2:??", month = may, year = "2025", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3719653", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Jun 14 09:34:50 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "AI applications are employed in diverse scenarios, including data centers, personal computers, smart cars, and so on. Their privacy is threatened by the intricate software stacks and the potential malfeasance of system maintainers. The Trusted Execution \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "2", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Qi:2025:EFT, author = "Sheng Qi and Haoyu Feng and Xuanzhe Liu and Xin Jin", title = "Efficient Fault Tolerance for Stateful Serverless Computing with Asymmetric Logging", journal = j-TOCS, volume = "43", number = "1--2", pages = "3:1--3:??", month = may, year = "2025", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3725985", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Jun 14 09:34:50 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Serverless computing separates function execution from state management. Simple retry-based fault tolerance might corrupt the shared state with duplicate updates. Existing solutions employ log-based fault tolerance to achieve exactly-once semantics, where \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "3", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Guo:2025:RIA, author = "Ruorong Guo and Yangye Zhou and Jinyan Xu and Wenbo Shen and Yajin Zhou and Rui Chang", title = "{RegVault II}: Achieving Hardware-Assisted Selective Kernel Data Randomization for Multiple Architectures", journal = j-TOCS, volume = "43", number = "1--2", pages = "4:1--4:??", month = may, year = "2025", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3734521", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Sat Jun 14 09:34:50 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/linux.bib; https://www.math.utah.edu/pub/tex/bib/risc-v.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib", abstract = "Memory corruption vulnerabilities pose a significant threat to system security. The traditional paging-based approach cannot protect fine-grained runtime data (e.g., function pointers), which are often mixed with other data in memory. To protect the the runtime data, data space randomization is proposed to encrypt the in-memory data so that the attacker cannot control the decrypted result. Unfortunately, current hardware does not provide dedicated support for fine-grained data encryption.\par This article presents RegVault II, a cross-architectural hardware-assisted lightweight data randomization scheme for OS kernels. To achieve robust, fine-grained, and lightweight data protection, we first identify five required capabilities for efficient and secure data randomization. Guided by these requirements, we design and implement novel hardware primitives that provide cryptographically strong encryption and decryption, thus ensuring both confidentiality and integrity for register-grained data. At the software level, we propose identification- and annotation-based approaches to automatically mark sensitive data and instrument the corresponding load and store operations. We also introduce new techniques to protect the interrupt context and safeguard the sensitive data spilling. We implement RegVault II on an actual FPGA hardware board for RISC-V and on QEMU for Arm, applying it to protect six types of sensitive data in the Linux kernel. Our thorough security and performance evaluations show that RegVault II effectively defends against a broad range of kernel data attacks while incurring minimal performance overhead.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "4", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Flinn:2025:ISS, author = "Jason Flinn and Margo Seltzer", title = "Introduction to the Special Section on {SOSP 2023}", journal = j-TOCS, volume = "43", number = "3", pages = "5:1--5:2", month = aug, year = "2025", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3744676", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Oct 2 10:43:56 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "5", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Li:2025:VJC, author = "Cong Li and Yanyan Jiang and Chang Xu and Zhendong Su", title = "Validating {JIT} Compilers via Compilation Space Exploration", journal = j-TOCS, volume = "43", number = "3", pages = "6:1--6:37", month = aug, year = "2025", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3715102", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Oct 2 10:43:56 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "We introduce the concept of compilation space as a new pivot for the comprehensive validation of just-in-time (JIT) compilers in modern language virtual machines (LVMs). The compilation space of a program encompasses a wide range of equivalent JIT-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "6", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Dong:2025:WSP, author = "Mingkai Dong and Fangnuo Wu and Gequan Mo and Haibo Chen", title = "Whole-system Persistence Made Efficient with Tree-structured Checkpointing on Microkernel", journal = j-TOCS, volume = "43", number = "3", pages = "7:1--7:29", month = aug, year = "2025", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3742425", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Oct 2 10:43:56 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Whole-system persistence promises simplified application deployment and near-instantaneous recovery. This can be implemented using single-level store (SLS) through periodic checkpointing of ephemeral state to persistent devices. However, traditional SLSs \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "7", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Li:2025:LGC, author = "Shengkai Li and Chenxi Wang and Haonan Xue and Haoran Ma and Shi Liu and Yifan Qiao and Jonathan Eyolfson and Christian Navasca and Shan Lu and Harry Xu", title = "Lining up Garbage Collection and Application for a Far-Memory-Friendly Runtime", journal = j-TOCS, volume = "43", number = "3", pages = "8:1--8:32", month = aug, year = "2025", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3749283", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Oct 2 10:43:56 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Far-memory techniques that enable applications to use remote memory are increasingly appealing in modern data centers, supporting applications' large memory footprint and improving machines' resource utilization. Unfortunately, most far-memory techniques \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "8", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Ramdas:2025:COS, author = "Abishek Ramdas and David Cock and Michael Giardino and Dario Korolija and Anastasiia Ruzhanskaia and Daniel Schwyn and Adam Turowski and Gustavo Alonso and Timothy Roscoe", title = "{CCKit}: an open-source toolkit for cache coherent accelerators", journal = j-TOCS, volume = "43", number = "3", pages = "9:1--9:30", month = aug, year = "2025", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3763790", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Oct 2 10:43:56 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "The trend toward system specialization is leading to a proliferation of accelerators, exposing interconnects as serious bottlenecks, both in functionality and performance. As a result, several alternative approaches have been proposed which promise to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "9", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Schroeder:2025:ISS, author = "Bianca Schroeder and Mark Silberstein", title = "Introduction to the Special Section on {EuroSys 2024}", journal = j-TOCS, volume = "43", number = "4", pages = "10:1--10:2", month = nov, year = "2025", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3768140", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Mar 5 11:24:43 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "10", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Wei:2025:TSD, author = "Xingda Wei and Fangming Lu and Zhuobin Huang and Rong Chen and Mingyu Wu and Haibo Chen", title = "Towards Serialization\slash Deserialization-free State Transfer in Serverless Workflows", journal = j-TOCS, volume = "43", number = "4", pages = "11:1--11:32", month = nov, year = "2025", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3725986", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Mar 5 11:24:43 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Serialization and deserialization dominate the state transfer time of serverless workflows, leading to substantial performance penalties when executing various serverless workflow applications. We identify the key reason for serialization and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "11", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Sajal:2025:TFS, author = "Sultan Mahmud Sajal and Md Salman Estyak and Rubaba Hasan and Timothy Zhu and Bhuvan Urgaonkar and Siddhartha Sen", title = "{TraceScaler}: a Framework for Scaling Load in Real-World Traces for System Evaluation", journal = j-TOCS, volume = "43", number = "4", pages = "12:1--12:31", month = nov, year = "2025", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3760774", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Mar 5 11:24:43 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Trace replay is a common approach for evaluating systems by rerunning historical traffic patterns, but it's not always possible to find suitable real-world traces at the desired level of system load. To experiment with different loads, one needs to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "12", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Xiao:2025:EAO, author = "Yunming Xiao and Yanqi Gu and Yibo Zhao and Sen Lin and Aleksandar Kuzmanovic", title = "Enabling Anonymous Online Streaming Analytics at the Network Edge", journal = j-TOCS, volume = "43", number = "4", pages = "13:1--13:39", month = nov, year = "2025", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3746130", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Mar 5 11:24:43 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "In recent years, content hyper-giants have increasingly deployed server infrastructure and services close to end-users within ``eyeball'' networks. Still, online streaming analytics has largely remained unaffected by this trend. This is despite the fact \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "13", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Mahmud:2025:ACD, author = "Tabassum Mahmud and Om Rameshwar Gatla and Duo Zhang and Carson Love and Ryan Bumann and Varun Girimaji and Mai Zheng", title = "Analyzing Configuration Dependencies of File Systems", journal = j-TOCS, volume = "43", number = "4", pages = "14:1--14:28", month = nov, year = "2025", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3747177", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Mar 5 11:24:43 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "File systems play an essential role in modern society for managing precious data. To meet diverse needs, they often support many configuration parameters. Such flexibility comes at the price of additional complexity which can lead to subtle configuration-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "14", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Yang:2025:LLC, author = "Zhenkun Yang and Chen Qian and Xuwang Teng and Fanyu Kong and Fusheng Han and Quanqing Xu and Daokun Hu", title = "{LCL+}: a Lock Chain Length-based Distributed Deadlock Detection and Resolution Service Built for {OceanBase}", journal = j-TOCS, volume = "43", number = "4", pages = "15:1--15:33", month = nov, year = "2025", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3768621", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Mar 5 11:24:43 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "The problem of deadlock detection and resolution in database systems has been studied for decades. Although it has long been a mature feature of classical centralized database systems for many years, its use in distributed database systems remains in its \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "15", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Noh:2026:ECM, author = "Sam H. Noh and Robbert van Renesse", title = "{Editor-in-Chief}'s Message", journal = j-TOCS, volume = "44", number = "1", pages = "1e:1--1e:4", month = feb, year = "2026", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3798064", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Mar 5 11:24:44 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "1e", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Han:2026:RTW, author = "Mingcong Han and Rong Chen and Weihang Shen and Hanze Zhang and Jinrong Yang and Haibo Chen", title = "Real-time, Work-conserving {GPU} Scheduling for Concurrent {DNN} Inference", journal = j-TOCS, volume = "44", number = "1", pages = "1:1--1:42", month = feb, year = "2026", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3768622", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Mar 5 11:24:44 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Many intelligent applications, such as autonomous driving and virtual reality, require running both latency-critical (real-time) and best-effort deep neural network (DNN) inference tasks to achieve both real-time and work-conserving on the GPU. However, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "1", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Jin:2026:REK, author = "Chao Jin and Zili Zhang and Xuanlin Jiang and Fangyue Liu and Shufan Liu and Xuanzhe Liu and Xin Jin", title = "{RAGCache}: Efficient Knowledge Caching for Retrieval-Augmented Generation", journal = j-TOCS, volume = "44", number = "1", pages = "2:1--2:27", month = feb, year = "2026", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3768628", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Mar 5 11:24:44 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Retrieval-Augmented Generation (RAG) has demonstrated substantial advancements in various natural language processing tasks by integrating the strengths of large language models (LLMs) and external knowledge databases. However, the retrieval step \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "2", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Song:2026:UNO, author = "Xiaoniu Song and Rong Chen and Haitao Song and Yiwen Zhang and Haibo Chen", title = "Unified and Near-optimal Multi-{GPU} Cache for Embedding-based Deep Learning", journal = j-TOCS, volume = "44", number = "1", pages = "3:1--3:32", month = feb, year = "2026", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3767725", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Mar 5 11:24:44 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "This article presents UGache , a unified multi-GPU cache system designed for embedding-based deep learning (EmbDL). UGache is primarily motivated by the unique characteristics of EmbDL applications, namely read-only and skewed embedding accesses with \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "3", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Li:2026:CSS, author = "Chuandong Li and Zhe Tang and Dong Liu and Zhihong Xue and Xiaolin Wang and Zhenlin Wang and Yingwei Luo and Diyu Zhou", title = "A Comprehensive Study on Solving Memory Bloat Under Virtualization", journal = j-TOCS, volume = "44", number = "1", pages = "4:1--4:28", month = feb, year = "2026", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3769429", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Mar 5 11:24:44 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Huge pages are effective in reducing address translation overhead under virtualization. However, huge pages can lead to the memory bloat problem, which manifests in two primary forms: hot bloat and usage bloat . Hot bloat occurs when accesses to a huge \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "4", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Jeong:2026:EDM, author = "Jinwoo Jeong and Jeongseob Ahn", title = "An Efficient {DNN} Model Serving System using Layer-wise Caching and Direct-Host-Access", journal = j-TOCS, volume = "44", number = "1", pages = "5:1--5:21", month = feb, year = "2026", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3774909", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Mar 5 11:24:44 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "With the increasing demand to utilize deep neural networks (DNNs) in online services, it is important to serve DNN models on GPUs in a cost-effective manner. Once the required DNN model is ready in the GPU memory, we can immediately serve the inference \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "5", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } @Article{Zhang:2026:AMS, author = "Hanze Zhang and Rong Chen and Zihan Tang and Ke Cheng and Haibo Chen", title = "Accelerating Million-scale In-network Lock Management using Lock Fission", journal = j-TOCS, volume = "44", number = "1", pages = "6:1--6:33", month = feb, year = "2026", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/3774945", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Mar 5 11:24:44 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Distributed lock services are extensively utilized in distributed systems to serialize concurrent accesses to shared resources. The need for fast and scalable lock services has become more pronounced with decreasing task execution times and expanding \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Comput. Syst.", articleno = "6", fjournal = "ACM Transactions on Computer Systems", journal-URL = "https://dl.acm.org/loi/tocs", } %%% ==================================================================== %%% Proceedings entries must come last: @Proceedings{ACM:1988:ASS, editor = "ACM", booktitle = "{1988 ACM\slash SIGOPS Symposium on Operating Systems Principles}", title = "{1988 ACM\slash SIGOPS Symposium on Operating Systems Principles}", volume = "6(1)", publisher = pub-ACM, address = pub-ACM:adr, pages = "??--??", month = feb, year = "1988", CODEN = "ACSYEC", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Thu Jan 14 06:47:30 MST 1999", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/tocs.bib", series = j-TOCS, abstract = "This issue contains 6 conference papers. The topics covered are: stored-voice management in the Etherphone system; 801 storage; scale and performance of a distributed file system; recovery performance in QuickSilver; fine-grained mobility in the Emerald system; caching in the Sprite network file system.", acknowledgement = ack-nhfb, classification = "723", conference = "1988 ACM\slash SIGOPS Symposium on Operating Systems Principles.", keywords = "801 storage; computer architecture; computer networks; computer operating systems; computer systems, digital --- Distributed; Emerald system; Etherphone system; QuickSilver; Sprite network file system", pagecount = "154", sponsor = "ACM, Special Interest Group on Operating Systems, New York, NY, USA", }